LarFii commited on
Commit
27a0799
·
1 Parent(s): d8f3d9c

Update delete_by_doc_id

Browse files
Files changed (1) hide show
  1. lightrag/lightrag.py +52 -40
lightrag/lightrag.py CHANGED
@@ -1555,51 +1555,57 @@ class LightRAG:
1555
  await self.text_chunks.delete(chunk_ids)
1556
 
1557
  # 5. Find and process entities and relationships that have these chunks as source
1558
- # Get all nodes in the graph
1559
- nodes = self.chunk_entity_relation_graph._graph.nodes(data=True)
1560
- edges = self.chunk_entity_relation_graph._graph.edges(data=True)
1561
-
1562
- # Track which entities and relationships need to be deleted or updated
1563
  entities_to_delete = set()
1564
  entities_to_update = {} # entity_name -> new_source_id
1565
  relationships_to_delete = set()
1566
  relationships_to_update = {} # (src, tgt) -> new_source_id
1567
 
1568
- # Process entities
1569
- for node, data in nodes:
1570
- if "source_id" in data:
 
 
1571
  # Split source_id using GRAPH_FIELD_SEP
1572
- sources = set(data["source_id"].split(GRAPH_FIELD_SEP))
1573
  sources.difference_update(chunk_ids)
1574
  if not sources:
1575
- entities_to_delete.add(node)
1576
  logger.debug(
1577
- f"Entity {node} marked for deletion - no remaining sources"
1578
  )
1579
  else:
1580
  new_source_id = GRAPH_FIELD_SEP.join(sources)
1581
- entities_to_update[node] = new_source_id
1582
  logger.debug(
1583
- f"Entity {node} will be updated with new source_id: {new_source_id}"
1584
  )
1585
 
1586
  # Process relationships
1587
- for src, tgt, data in edges:
1588
- if "source_id" in data:
1589
- # Split source_id using GRAPH_FIELD_SEP
1590
- sources = set(data["source_id"].split(GRAPH_FIELD_SEP))
1591
- sources.difference_update(chunk_ids)
1592
- if not sources:
1593
- relationships_to_delete.add((src, tgt))
1594
- logger.debug(
1595
- f"Relationship {src}-{tgt} marked for deletion - no remaining sources"
1596
- )
1597
- else:
1598
- new_source_id = GRAPH_FIELD_SEP.join(sources)
1599
- relationships_to_update[(src, tgt)] = new_source_id
1600
- logger.debug(
1601
- f"Relationship {src}-{tgt} will be updated with new source_id: {new_source_id}"
1602
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1603
 
1604
  # Delete entities
1605
  if entities_to_delete:
@@ -1613,12 +1619,15 @@ class LightRAG:
1613
 
1614
  # Update entities
1615
  for entity, new_source_id in entities_to_update.items():
1616
- node_data = self.chunk_entity_relation_graph._graph.nodes[entity]
1617
- node_data["source_id"] = new_source_id
1618
- await self.chunk_entity_relation_graph.upsert_node(entity, node_data)
1619
- logger.debug(
1620
- f"Updated entity {entity} with new source_id: {new_source_id}"
1621
- )
 
 
 
1622
 
1623
  # Delete relationships
1624
  if relationships_to_delete:
@@ -1636,12 +1645,15 @@ class LightRAG:
1636
 
1637
  # Update relationships
1638
  for (src, tgt), new_source_id in relationships_to_update.items():
1639
- edge_data = self.chunk_entity_relation_graph._graph.edges[src, tgt]
1640
- edge_data["source_id"] = new_source_id
1641
- await self.chunk_entity_relation_graph.upsert_edge(src, tgt, edge_data)
1642
- logger.debug(
1643
- f"Updated relationship {src}-{tgt} with new source_id: {new_source_id}"
1644
- )
 
 
 
1645
 
1646
  # 6. Delete original document and status
1647
  await self.full_docs.delete([doc_id])
 
1555
  await self.text_chunks.delete(chunk_ids)
1556
 
1557
  # 5. Find and process entities and relationships that have these chunks as source
1558
+ # Get all nodes and edges from the graph storage using storage-agnostic methods
 
 
 
 
1559
  entities_to_delete = set()
1560
  entities_to_update = {} # entity_name -> new_source_id
1561
  relationships_to_delete = set()
1562
  relationships_to_update = {} # (src, tgt) -> new_source_id
1563
 
1564
+ # Process entities - use storage-agnostic methods
1565
+ all_labels = await self.chunk_entity_relation_graph.get_all_labels()
1566
+ for node_label in all_labels:
1567
+ node_data = await self.chunk_entity_relation_graph.get_node(node_label)
1568
+ if node_data and "source_id" in node_data:
1569
  # Split source_id using GRAPH_FIELD_SEP
1570
+ sources = set(node_data["source_id"].split(GRAPH_FIELD_SEP))
1571
  sources.difference_update(chunk_ids)
1572
  if not sources:
1573
+ entities_to_delete.add(node_label)
1574
  logger.debug(
1575
+ f"Entity {node_label} marked for deletion - no remaining sources"
1576
  )
1577
  else:
1578
  new_source_id = GRAPH_FIELD_SEP.join(sources)
1579
+ entities_to_update[node_label] = new_source_id
1580
  logger.debug(
1581
+ f"Entity {node_label} will be updated with new source_id: {new_source_id}"
1582
  )
1583
 
1584
  # Process relationships
1585
+ for node_label in all_labels:
1586
+ node_edges = await self.chunk_entity_relation_graph.get_node_edges(
1587
+ node_label
1588
+ )
1589
+ if node_edges:
1590
+ for src, tgt in node_edges:
1591
+ edge_data = await self.chunk_entity_relation_graph.get_edge(
1592
+ src, tgt
 
 
 
 
 
 
 
1593
  )
1594
+ if edge_data and "source_id" in edge_data:
1595
+ # Split source_id using GRAPH_FIELD_SEP
1596
+ sources = set(edge_data["source_id"].split(GRAPH_FIELD_SEP))
1597
+ sources.difference_update(chunk_ids)
1598
+ if not sources:
1599
+ relationships_to_delete.add((src, tgt))
1600
+ logger.debug(
1601
+ f"Relationship {src}-{tgt} marked for deletion - no remaining sources"
1602
+ )
1603
+ else:
1604
+ new_source_id = GRAPH_FIELD_SEP.join(sources)
1605
+ relationships_to_update[(src, tgt)] = new_source_id
1606
+ logger.debug(
1607
+ f"Relationship {src}-{tgt} will be updated with new source_id: {new_source_id}"
1608
+ )
1609
 
1610
  # Delete entities
1611
  if entities_to_delete:
 
1619
 
1620
  # Update entities
1621
  for entity, new_source_id in entities_to_update.items():
1622
+ node_data = await self.chunk_entity_relation_graph.get_node(entity)
1623
+ if node_data:
1624
+ node_data["source_id"] = new_source_id
1625
+ await self.chunk_entity_relation_graph.upsert_node(
1626
+ entity, node_data
1627
+ )
1628
+ logger.debug(
1629
+ f"Updated entity {entity} with new source_id: {new_source_id}"
1630
+ )
1631
 
1632
  # Delete relationships
1633
  if relationships_to_delete:
 
1645
 
1646
  # Update relationships
1647
  for (src, tgt), new_source_id in relationships_to_update.items():
1648
+ edge_data = await self.chunk_entity_relation_graph.get_edge(src, tgt)
1649
+ if edge_data:
1650
+ edge_data["source_id"] = new_source_id
1651
+ await self.chunk_entity_relation_graph.upsert_edge(
1652
+ src, tgt, edge_data
1653
+ )
1654
+ logger.debug(
1655
+ f"Updated relationship {src}-{tgt} with new source_id: {new_source_id}"
1656
+ )
1657
 
1658
  # 6. Delete original document and status
1659
  await self.full_docs.delete([doc_id])