LarFii commited on
Commit
b1f53fb
·
1 Parent(s): 5bbcf23

fix delete_by_doc_id

Browse files
lightrag/kg/json_kv_impl.py CHANGED
@@ -44,6 +44,15 @@ class JsonKVStorage(BaseKVStorage):
44
  )
45
  write_json(data_dict, self._file_name)
46
 
 
 
 
 
 
 
 
 
 
47
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
48
  async with self._storage_lock:
49
  return self._data.get(id)
 
44
  )
45
  write_json(data_dict, self._file_name)
46
 
47
+ async def get_all(self) -> dict[str, Any]:
48
+ """Get all data from storage
49
+
50
+ Returns:
51
+ Dictionary containing all stored data
52
+ """
53
+ async with self._storage_lock:
54
+ return dict(self._data)
55
+
56
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
57
  async with self._storage_lock:
58
  return self._data.get(id)
lightrag/kg/tidb_impl.py CHANGED
@@ -174,6 +174,14 @@ class TiDBKVStorage(BaseKVStorage):
174
  self.db = None
175
 
176
  ################ QUERY METHODS ################
 
 
 
 
 
 
 
 
177
 
178
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
179
  """Fetch doc_full data by id."""
 
174
  self.db = None
175
 
176
  ################ QUERY METHODS ################
177
+ async def get_all(self) -> dict[str, Any]:
178
+ """Get all data from storage
179
+
180
+ Returns:
181
+ Dictionary containing all stored data
182
+ """
183
+ async with self._storage_lock:
184
+ return dict(self._data)
185
 
186
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
187
  """Fetch doc_full data by id."""
lightrag/lightrag.py CHANGED
@@ -1431,14 +1431,22 @@ class LightRAG:
1431
 
1432
  logger.debug(f"Starting deletion for document {doc_id}")
1433
 
1434
- doc_to_chunk_id = doc_id.replace("doc", "chunk")
 
 
 
 
 
 
 
 
1435
 
1436
- # 2. Get all related chunks
1437
- chunks = await self.text_chunks.get_by_id(doc_to_chunk_id)
1438
- if not chunks:
1439
  return
1440
 
1441
- chunk_ids = {chunks["full_doc_id"].replace("doc", "chunk")}
 
1442
  logger.debug(f"Found {len(chunk_ids)} chunks to delete")
1443
 
1444
  # 3. Before deleting, check the related entities and relationships for these chunks
@@ -1626,9 +1634,18 @@ class LightRAG:
1626
  logger.warning(f"Document {doc_id} still exists in full_docs")
1627
 
1628
  # Verify if chunks have been deleted
1629
- remaining_chunks = await self.text_chunks.get_by_id(doc_to_chunk_id)
1630
- if remaining_chunks:
1631
- logger.warning(f"Found {len(remaining_chunks)} remaining chunks")
 
 
 
 
 
 
 
 
 
1632
 
1633
  # Verify entities and relationships
1634
  for chunk_id in chunk_ids:
 
1431
 
1432
  logger.debug(f"Starting deletion for document {doc_id}")
1433
 
1434
+ # 2. Get all chunks related to this document
1435
+ # Find all chunks where full_doc_id equals the current doc_id
1436
+ all_chunks = await self.text_chunks.get_all()
1437
+ related_chunks = {
1438
+ chunk_id: chunk_data
1439
+ for chunk_id, chunk_data in all_chunks.items()
1440
+ if isinstance(chunk_data, dict)
1441
+ and chunk_data.get("full_doc_id") == doc_id
1442
+ }
1443
 
1444
+ if not related_chunks:
1445
+ logger.warning(f"No chunks found for document {doc_id}")
 
1446
  return
1447
 
1448
+ # Get all related chunk IDs
1449
+ chunk_ids = set(related_chunks.keys())
1450
  logger.debug(f"Found {len(chunk_ids)} chunks to delete")
1451
 
1452
  # 3. Before deleting, check the related entities and relationships for these chunks
 
1634
  logger.warning(f"Document {doc_id} still exists in full_docs")
1635
 
1636
  # Verify if chunks have been deleted
1637
+ all_remaining_chunks = await self.text_chunks.get_all()
1638
+ remaining_related_chunks = {
1639
+ chunk_id: chunk_data
1640
+ for chunk_id, chunk_data in all_remaining_chunks.items()
1641
+ if isinstance(chunk_data, dict)
1642
+ and chunk_data.get("full_doc_id") == doc_id
1643
+ }
1644
+
1645
+ if remaining_related_chunks:
1646
+ logger.warning(
1647
+ f"Found {len(remaining_related_chunks)} remaining chunks"
1648
+ )
1649
 
1650
  # Verify entities and relationships
1651
  for chunk_id in chunk_ids: