fix delete_by_doc_id
Browse files- lightrag/kg/json_kv_impl.py +9 -0
- lightrag/kg/tidb_impl.py +8 -0
- lightrag/lightrag.py +25 -8
lightrag/kg/json_kv_impl.py
CHANGED
@@ -44,6 +44,15 @@ class JsonKVStorage(BaseKVStorage):
|
|
44 |
)
|
45 |
write_json(data_dict, self._file_name)
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
48 |
async with self._storage_lock:
|
49 |
return self._data.get(id)
|
|
|
44 |
)
|
45 |
write_json(data_dict, self._file_name)
|
46 |
|
47 |
+
async def get_all(self) -> dict[str, Any]:
|
48 |
+
"""Get all data from storage
|
49 |
+
|
50 |
+
Returns:
|
51 |
+
Dictionary containing all stored data
|
52 |
+
"""
|
53 |
+
async with self._storage_lock:
|
54 |
+
return dict(self._data)
|
55 |
+
|
56 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
57 |
async with self._storage_lock:
|
58 |
return self._data.get(id)
|
lightrag/kg/tidb_impl.py
CHANGED
@@ -174,6 +174,14 @@ class TiDBKVStorage(BaseKVStorage):
|
|
174 |
self.db = None
|
175 |
|
176 |
################ QUERY METHODS ################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
|
178 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
179 |
"""Fetch doc_full data by id."""
|
|
|
174 |
self.db = None
|
175 |
|
176 |
################ QUERY METHODS ################
|
177 |
+
async def get_all(self) -> dict[str, Any]:
|
178 |
+
"""Get all data from storage
|
179 |
+
|
180 |
+
Returns:
|
181 |
+
Dictionary containing all stored data
|
182 |
+
"""
|
183 |
+
async with self._storage_lock:
|
184 |
+
return dict(self._data)
|
185 |
|
186 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
187 |
"""Fetch doc_full data by id."""
|
lightrag/lightrag.py
CHANGED
@@ -1431,14 +1431,22 @@ class LightRAG:
|
|
1431 |
|
1432 |
logger.debug(f"Starting deletion for document {doc_id}")
|
1433 |
|
1434 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1435 |
|
1436 |
-
|
1437 |
-
|
1438 |
-
if not chunks:
|
1439 |
return
|
1440 |
|
1441 |
-
|
|
|
1442 |
logger.debug(f"Found {len(chunk_ids)} chunks to delete")
|
1443 |
|
1444 |
# 3. Before deleting, check the related entities and relationships for these chunks
|
@@ -1626,9 +1634,18 @@ class LightRAG:
|
|
1626 |
logger.warning(f"Document {doc_id} still exists in full_docs")
|
1627 |
|
1628 |
# Verify if chunks have been deleted
|
1629 |
-
|
1630 |
-
|
1631 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1632 |
|
1633 |
# Verify entities and relationships
|
1634 |
for chunk_id in chunk_ids:
|
|
|
1431 |
|
1432 |
logger.debug(f"Starting deletion for document {doc_id}")
|
1433 |
|
1434 |
+
# 2. Get all chunks related to this document
|
1435 |
+
# Find all chunks where full_doc_id equals the current doc_id
|
1436 |
+
all_chunks = await self.text_chunks.get_all()
|
1437 |
+
related_chunks = {
|
1438 |
+
chunk_id: chunk_data
|
1439 |
+
for chunk_id, chunk_data in all_chunks.items()
|
1440 |
+
if isinstance(chunk_data, dict)
|
1441 |
+
and chunk_data.get("full_doc_id") == doc_id
|
1442 |
+
}
|
1443 |
|
1444 |
+
if not related_chunks:
|
1445 |
+
logger.warning(f"No chunks found for document {doc_id}")
|
|
|
1446 |
return
|
1447 |
|
1448 |
+
# Get all related chunk IDs
|
1449 |
+
chunk_ids = set(related_chunks.keys())
|
1450 |
logger.debug(f"Found {len(chunk_ids)} chunks to delete")
|
1451 |
|
1452 |
# 3. Before deleting, check the related entities and relationships for these chunks
|
|
|
1634 |
logger.warning(f"Document {doc_id} still exists in full_docs")
|
1635 |
|
1636 |
# Verify if chunks have been deleted
|
1637 |
+
all_remaining_chunks = await self.text_chunks.get_all()
|
1638 |
+
remaining_related_chunks = {
|
1639 |
+
chunk_id: chunk_data
|
1640 |
+
for chunk_id, chunk_data in all_remaining_chunks.items()
|
1641 |
+
if isinstance(chunk_data, dict)
|
1642 |
+
and chunk_data.get("full_doc_id") == doc_id
|
1643 |
+
}
|
1644 |
+
|
1645 |
+
if remaining_related_chunks:
|
1646 |
+
logger.warning(
|
1647 |
+
f"Found {len(remaining_related_chunks)} remaining chunks"
|
1648 |
+
)
|
1649 |
|
1650 |
# Verify entities and relationships
|
1651 |
for chunk_id in chunk_ids:
|