Update delete_by_doc_id
Browse files- lightrag/lightrag.py +52 -40
lightrag/lightrag.py
CHANGED
@@ -1555,51 +1555,57 @@ class LightRAG:
|
|
1555 |
await self.text_chunks.delete(chunk_ids)
|
1556 |
|
1557 |
# 5. Find and process entities and relationships that have these chunks as source
|
1558 |
-
# Get all nodes
|
1559 |
-
nodes = self.chunk_entity_relation_graph._graph.nodes(data=True)
|
1560 |
-
edges = self.chunk_entity_relation_graph._graph.edges(data=True)
|
1561 |
-
|
1562 |
-
# Track which entities and relationships need to be deleted or updated
|
1563 |
entities_to_delete = set()
|
1564 |
entities_to_update = {} # entity_name -> new_source_id
|
1565 |
relationships_to_delete = set()
|
1566 |
relationships_to_update = {} # (src, tgt) -> new_source_id
|
1567 |
|
1568 |
-
# Process entities
|
1569 |
-
|
1570 |
-
|
|
|
|
|
1571 |
# Split source_id using GRAPH_FIELD_SEP
|
1572 |
-
sources = set(
|
1573 |
sources.difference_update(chunk_ids)
|
1574 |
if not sources:
|
1575 |
-
entities_to_delete.add(
|
1576 |
logger.debug(
|
1577 |
-
f"Entity {
|
1578 |
)
|
1579 |
else:
|
1580 |
new_source_id = GRAPH_FIELD_SEP.join(sources)
|
1581 |
-
entities_to_update[
|
1582 |
logger.debug(
|
1583 |
-
f"Entity {
|
1584 |
)
|
1585 |
|
1586 |
# Process relationships
|
1587 |
-
for
|
1588 |
-
|
1589 |
-
|
1590 |
-
|
1591 |
-
|
1592 |
-
|
1593 |
-
|
1594 |
-
|
1595 |
-
f"Relationship {src}-{tgt} marked for deletion - no remaining sources"
|
1596 |
-
)
|
1597 |
-
else:
|
1598 |
-
new_source_id = GRAPH_FIELD_SEP.join(sources)
|
1599 |
-
relationships_to_update[(src, tgt)] = new_source_id
|
1600 |
-
logger.debug(
|
1601 |
-
f"Relationship {src}-{tgt} will be updated with new source_id: {new_source_id}"
|
1602 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1603 |
|
1604 |
# Delete entities
|
1605 |
if entities_to_delete:
|
@@ -1613,12 +1619,15 @@ class LightRAG:
|
|
1613 |
|
1614 |
# Update entities
|
1615 |
for entity, new_source_id in entities_to_update.items():
|
1616 |
-
node_data = self.chunk_entity_relation_graph.
|
1617 |
-
node_data
|
1618 |
-
|
1619 |
-
|
1620 |
-
|
1621 |
-
|
|
|
|
|
|
|
1622 |
|
1623 |
# Delete relationships
|
1624 |
if relationships_to_delete:
|
@@ -1636,12 +1645,15 @@ class LightRAG:
|
|
1636 |
|
1637 |
# Update relationships
|
1638 |
for (src, tgt), new_source_id in relationships_to_update.items():
|
1639 |
-
edge_data = self.chunk_entity_relation_graph.
|
1640 |
-
edge_data
|
1641 |
-
|
1642 |
-
|
1643 |
-
|
1644 |
-
|
|
|
|
|
|
|
1645 |
|
1646 |
# 6. Delete original document and status
|
1647 |
await self.full_docs.delete([doc_id])
|
|
|
1555 |
await self.text_chunks.delete(chunk_ids)
|
1556 |
|
1557 |
# 5. Find and process entities and relationships that have these chunks as source
|
1558 |
+
# Get all nodes and edges from the graph storage using storage-agnostic methods
|
|
|
|
|
|
|
|
|
1559 |
entities_to_delete = set()
|
1560 |
entities_to_update = {} # entity_name -> new_source_id
|
1561 |
relationships_to_delete = set()
|
1562 |
relationships_to_update = {} # (src, tgt) -> new_source_id
|
1563 |
|
1564 |
+
# Process entities - use storage-agnostic methods
|
1565 |
+
all_labels = await self.chunk_entity_relation_graph.get_all_labels()
|
1566 |
+
for node_label in all_labels:
|
1567 |
+
node_data = await self.chunk_entity_relation_graph.get_node(node_label)
|
1568 |
+
if node_data and "source_id" in node_data:
|
1569 |
# Split source_id using GRAPH_FIELD_SEP
|
1570 |
+
sources = set(node_data["source_id"].split(GRAPH_FIELD_SEP))
|
1571 |
sources.difference_update(chunk_ids)
|
1572 |
if not sources:
|
1573 |
+
entities_to_delete.add(node_label)
|
1574 |
logger.debug(
|
1575 |
+
f"Entity {node_label} marked for deletion - no remaining sources"
|
1576 |
)
|
1577 |
else:
|
1578 |
new_source_id = GRAPH_FIELD_SEP.join(sources)
|
1579 |
+
entities_to_update[node_label] = new_source_id
|
1580 |
logger.debug(
|
1581 |
+
f"Entity {node_label} will be updated with new source_id: {new_source_id}"
|
1582 |
)
|
1583 |
|
1584 |
# Process relationships
|
1585 |
+
for node_label in all_labels:
|
1586 |
+
node_edges = await self.chunk_entity_relation_graph.get_node_edges(
|
1587 |
+
node_label
|
1588 |
+
)
|
1589 |
+
if node_edges:
|
1590 |
+
for src, tgt in node_edges:
|
1591 |
+
edge_data = await self.chunk_entity_relation_graph.get_edge(
|
1592 |
+
src, tgt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1593 |
)
|
1594 |
+
if edge_data and "source_id" in edge_data:
|
1595 |
+
# Split source_id using GRAPH_FIELD_SEP
|
1596 |
+
sources = set(edge_data["source_id"].split(GRAPH_FIELD_SEP))
|
1597 |
+
sources.difference_update(chunk_ids)
|
1598 |
+
if not sources:
|
1599 |
+
relationships_to_delete.add((src, tgt))
|
1600 |
+
logger.debug(
|
1601 |
+
f"Relationship {src}-{tgt} marked for deletion - no remaining sources"
|
1602 |
+
)
|
1603 |
+
else:
|
1604 |
+
new_source_id = GRAPH_FIELD_SEP.join(sources)
|
1605 |
+
relationships_to_update[(src, tgt)] = new_source_id
|
1606 |
+
logger.debug(
|
1607 |
+
f"Relationship {src}-{tgt} will be updated with new source_id: {new_source_id}"
|
1608 |
+
)
|
1609 |
|
1610 |
# Delete entities
|
1611 |
if entities_to_delete:
|
|
|
1619 |
|
1620 |
# Update entities
|
1621 |
for entity, new_source_id in entities_to_update.items():
|
1622 |
+
node_data = await self.chunk_entity_relation_graph.get_node(entity)
|
1623 |
+
if node_data:
|
1624 |
+
node_data["source_id"] = new_source_id
|
1625 |
+
await self.chunk_entity_relation_graph.upsert_node(
|
1626 |
+
entity, node_data
|
1627 |
+
)
|
1628 |
+
logger.debug(
|
1629 |
+
f"Updated entity {entity} with new source_id: {new_source_id}"
|
1630 |
+
)
|
1631 |
|
1632 |
# Delete relationships
|
1633 |
if relationships_to_delete:
|
|
|
1645 |
|
1646 |
# Update relationships
|
1647 |
for (src, tgt), new_source_id in relationships_to_update.items():
|
1648 |
+
edge_data = await self.chunk_entity_relation_graph.get_edge(src, tgt)
|
1649 |
+
if edge_data:
|
1650 |
+
edge_data["source_id"] = new_source_id
|
1651 |
+
await self.chunk_entity_relation_graph.upsert_edge(
|
1652 |
+
src, tgt, edge_data
|
1653 |
+
)
|
1654 |
+
logger.debug(
|
1655 |
+
f"Updated relationship {src}-{tgt} with new source_id: {new_source_id}"
|
1656 |
+
)
|
1657 |
|
1658 |
# 6. Delete original document and status
|
1659 |
await self.full_docs.delete([doc_id])
|