yangdx commited on
Commit
eaa7c14
·
1 Parent(s): fc6211e

Add delete support to all storage implementation

Browse files
lightrag/base.py CHANGED
@@ -153,15 +153,33 @@ class BaseVectorStorage(StorageNameSpace, ABC):
153
 
154
  @abstractmethod
155
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
156
- """Insert or update vectors in the storage."""
 
 
 
 
 
 
157
 
158
  @abstractmethod
159
  async def delete_entity(self, entity_name: str) -> None:
160
- """Delete a single entity by its name."""
 
 
 
 
 
 
161
 
162
  @abstractmethod
163
  async def delete_entity_relation(self, entity_name: str) -> None:
164
- """Delete relations for a given entity."""
 
 
 
 
 
 
165
 
166
  @abstractmethod
167
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
@@ -187,6 +205,19 @@ class BaseVectorStorage(StorageNameSpace, ABC):
187
  """
188
  pass
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
  @dataclass
192
  class BaseKVStorage(StorageNameSpace, ABC):
@@ -206,16 +237,20 @@ class BaseKVStorage(StorageNameSpace, ABC):
206
 
207
  @abstractmethod
208
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
209
- """Upsert data"""
 
 
 
 
 
210
 
211
  @abstractmethod
212
  async def delete(self, ids: list[str]) -> None:
213
  """Delete specific records from storage by their IDs
214
 
215
- This method will:
216
- 1. Remove the specified records from in-memory storage
217
- 2. For in-memory DB, update flags to notify other processes that data persistence is needed
218
- 3. For in-memory DB, changes will be persisted to disk during the next index_done_callback
219
 
220
  Args:
221
  ids (list[str]): List of document IDs to be deleted from storage
@@ -267,7 +302,14 @@ class BaseGraphStorage(StorageNameSpace, ABC):
267
  async def upsert_edge(
268
  self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
269
  ) -> None:
270
- """Delete a node from the graph."""
 
 
 
 
 
 
 
271
 
272
  @abstractmethod
273
  async def delete_node(self, node_id: str) -> None:
 
153
 
154
  @abstractmethod
155
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
156
+ """Insert or update vectors in the storage.
157
+
158
+ Importance notes for in-memory storage:
159
+ 1. Changes will be persisted to disk during the next index_done_callback
160
+ 2. Only one process should updating the storage at a time before index_done_callback,
161
+ KG-storage-log should be used to avoid data corruption
162
+ """
163
 
164
  @abstractmethod
165
  async def delete_entity(self, entity_name: str) -> None:
166
+ """Delete a single entity by its name.
167
+
168
+ Importance notes for in-memory storage:
169
+ 1. Changes will be persisted to disk during the next index_done_callback
170
+ 2. Only one process should updating the storage at a time before index_done_callback,
171
+ KG-storage-log should be used to avoid data corruption
172
+ """
173
 
174
  @abstractmethod
175
  async def delete_entity_relation(self, entity_name: str) -> None:
176
+ """Delete relations for a given entity.
177
+
178
+ Importance notes for in-memory storage:
179
+ 1. Changes will be persisted to disk during the next index_done_callback
180
+ 2. Only one process should updating the storage at a time before index_done_callback,
181
+ KG-storage-log should be used to avoid data corruption
182
+ """
183
 
184
  @abstractmethod
185
  async def get_by_id(self, id: str) -> dict[str, Any] | None:
 
205
  """
206
  pass
207
 
208
+ @abstractmethod
209
+ async def delete(self, ids: list[str]):
210
+ """Delete vectors with specified IDs
211
+
212
+ Importance notes for in-memory storage:
213
+ 1. Changes will be persisted to disk during the next index_done_callback
214
+ 2. Only one process should updating the storage at a time before index_done_callback,
215
+ KG-storage-log should be used to avoid data corruption
216
+
217
+ Args:
218
+ ids: List of vector IDs to be deleted
219
+ """
220
+
221
 
222
  @dataclass
223
  class BaseKVStorage(StorageNameSpace, ABC):
 
237
 
238
  @abstractmethod
239
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
240
+ """Upsert data
241
+
242
+ Importance notes for in-memory storage:
243
+ 1. Changes will be persisted to disk during the next index_done_callback
244
+ 2. update flags to notify other processes that data persistence is needed
245
+ """
246
 
247
  @abstractmethod
248
  async def delete(self, ids: list[str]) -> None:
249
  """Delete specific records from storage by their IDs
250
 
251
+ Importance notes for in-memory storage:
252
+ 1. Changes will be persisted to disk during the next index_done_callback
253
+ 2. update flags to notify other processes that data persistence is needed
 
254
 
255
  Args:
256
  ids (list[str]): List of document IDs to be deleted from storage
 
302
  async def upsert_edge(
303
  self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
304
  ) -> None:
305
+ """Delete a node from the graph.
306
+
307
+ Importance notes for in-memory storage:
308
+ 1. Changes will be persisted to disk during the next index_done_callback
309
+ 2. Only one process should updating the storage at a time before index_done_callback,
310
+ KG-storage-log should be used to avoid data corruption
311
+ """
312
+
313
 
314
  @abstractmethod
315
  async def delete_node(self, node_id: str) -> None:
lightrag/kg/faiss_impl.py CHANGED
@@ -217,6 +217,11 @@ class FaissVectorDBStorage(BaseVectorStorage):
217
  async def delete(self, ids: list[str]):
218
  """
219
  Delete vectors for the provided custom IDs.
 
 
 
 
 
220
  """
221
  logger.info(f"Deleting {len(ids)} vectors from {self.namespace}")
222
  to_remove = []
@@ -232,13 +237,22 @@ class FaissVectorDBStorage(BaseVectorStorage):
232
  )
233
 
234
  async def delete_entity(self, entity_name: str) -> None:
 
 
 
 
 
 
235
  entity_id = compute_mdhash_id(entity_name, prefix="ent-")
236
  logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
237
  await self.delete([entity_id])
238
 
239
  async def delete_entity_relation(self, entity_name: str) -> None:
240
  """
241
- Delete relations for a given entity by scanning metadata.
 
 
 
242
  """
243
  logger.debug(f"Searching relations for entity {entity_name}")
244
  relations = []
@@ -433,6 +447,12 @@ class FaissVectorDBStorage(BaseVectorStorage):
433
  async def drop(self) -> dict[str, str]:
434
  """Drop all vector data from storage and clean up resources
435
 
 
 
 
 
 
 
436
  This method will remove all vectors from the Faiss index and delete the storage files.
437
 
438
  Returns:
 
217
  async def delete(self, ids: list[str]):
218
  """
219
  Delete vectors for the provided custom IDs.
220
+
221
+ Importance notes:
222
+ 1. Changes will be persisted to disk during the next index_done_callback
223
+ 2. Only one process should updating the storage at a time before index_done_callback,
224
+ KG-storage-log should be used to avoid data corruption
225
  """
226
  logger.info(f"Deleting {len(ids)} vectors from {self.namespace}")
227
  to_remove = []
 
237
  )
238
 
239
  async def delete_entity(self, entity_name: str) -> None:
240
+ """
241
+ Importance notes:
242
+ 1. Changes will be persisted to disk during the next index_done_callback
243
+ 2. Only one process should updating the storage at a time before index_done_callback,
244
+ KG-storage-log should be used to avoid data corruption
245
+ """
246
  entity_id = compute_mdhash_id(entity_name, prefix="ent-")
247
  logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
248
  await self.delete([entity_id])
249
 
250
  async def delete_entity_relation(self, entity_name: str) -> None:
251
  """
252
+ Importance notes:
253
+ 1. Changes will be persisted to disk during the next index_done_callback
254
+ 2. Only one process should updating the storage at a time before index_done_callback,
255
+ KG-storage-log should be used to avoid data corruption
256
  """
257
  logger.debug(f"Searching relations for entity {entity_name}")
258
  relations = []
 
447
  async def drop(self) -> dict[str, str]:
448
  """Drop all vector data from storage and clean up resources
449
 
450
+ This method will:
451
+ 1. Remove the vector database storage file if it exists
452
+ 2. Reinitialize the vector database client
453
+ 3. Update flags to notify other processes
454
+ 4. Changes is persisted to disk immediately
455
+
456
  This method will remove all vectors from the Faiss index and delete the storage files.
457
 
458
  Returns:
lightrag/kg/json_doc_status_impl.py CHANGED
@@ -109,6 +109,11 @@ class JsonDocStatusStorage(DocStatusStorage):
109
  await clear_all_update_flags(self.namespace)
110
 
111
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
 
 
 
 
 
112
  if not data:
113
  return
114
  logger.info(f"Inserting {len(data)} records to {self.namespace}")
@@ -125,10 +130,9 @@ class JsonDocStatusStorage(DocStatusStorage):
125
  async def delete(self, doc_ids: list[str]):
126
  """Delete specific records from storage by their IDs
127
 
128
- This method will:
129
- 1. Remove the specified records from in-memory storage
130
- 2. Update flags to notify other processes that data persistence is needed
131
- 3. The changes will be persisted to disk during the next index_done_callback
132
 
133
  Args:
134
  ids (list[str]): List of document IDs to be deleted from storage
 
109
  await clear_all_update_flags(self.namespace)
110
 
111
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
112
+ """
113
+ Importance notes for in-memory storage:
114
+ 1. Changes will be persisted to disk during the next index_done_callback
115
+ 2. update flags to notify other processes that data persistence is needed
116
+ """
117
  if not data:
118
  return
119
  logger.info(f"Inserting {len(data)} records to {self.namespace}")
 
130
  async def delete(self, doc_ids: list[str]):
131
  """Delete specific records from storage by their IDs
132
 
133
+ Importance notes for in-memory storage:
134
+ 1. Changes will be persisted to disk during the next index_done_callback
135
+ 2. update flags to notify other processes that data persistence is needed
 
136
 
137
  Args:
138
  ids (list[str]): List of document IDs to be deleted from storage
lightrag/kg/json_kv_impl.py CHANGED
@@ -114,6 +114,11 @@ class JsonKVStorage(BaseKVStorage):
114
  return set(keys) - set(self._data.keys())
115
 
116
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
 
 
 
 
 
117
  if not data:
118
  return
119
  logger.info(f"Inserting {len(data)} records to {self.namespace}")
@@ -124,10 +129,9 @@ class JsonKVStorage(BaseKVStorage):
124
  async def delete(self, ids: list[str]) -> None:
125
  """Delete specific records from storage by their IDs
126
 
127
- This method will:
128
- 1. Remove the specified records from in-memory storage
129
- 2. Update flags to notify other processes that data persistence is needed
130
- 3. The changes will be persisted to disk during the next index_done_callback
131
 
132
  Args:
133
  ids (list[str]): List of document IDs to be deleted from storage
 
114
  return set(keys) - set(self._data.keys())
115
 
116
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
117
+ """
118
+ Importance notes for in-memory storage:
119
+ 1. Changes will be persisted to disk during the next index_done_callback
120
+ 2. update flags to notify other processes that data persistence is needed
121
+ """
122
  if not data:
123
  return
124
  logger.info(f"Inserting {len(data)} records to {self.namespace}")
 
129
  async def delete(self, ids: list[str]) -> None:
130
  """Delete specific records from storage by their IDs
131
 
132
+ Importance notes for in-memory storage:
133
+ 1. Changes will be persisted to disk during the next index_done_callback
134
+ 2. update flags to notify other processes that data persistence is needed
 
135
 
136
  Args:
137
  ids (list[str]): List of document IDs to be deleted from storage
lightrag/kg/nano_vector_db_impl.py CHANGED
@@ -78,6 +78,13 @@ class NanoVectorDBStorage(BaseVectorStorage):
78
  return self._client
79
 
80
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
 
 
 
 
 
 
 
81
  logger.info(f"Inserting {len(data)} to {self.namespace}")
82
  if not data:
83
  return
@@ -146,6 +153,11 @@ class NanoVectorDBStorage(BaseVectorStorage):
146
  async def delete(self, ids: list[str]):
147
  """Delete vectors with specified IDs
148
 
 
 
 
 
 
149
  Args:
150
  ids: List of vector IDs to be deleted
151
  """
@@ -159,6 +171,13 @@ class NanoVectorDBStorage(BaseVectorStorage):
159
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
160
 
161
  async def delete_entity(self, entity_name: str) -> None:
 
 
 
 
 
 
 
162
  try:
163
  entity_id = compute_mdhash_id(entity_name, prefix="ent-")
164
  logger.debug(
@@ -176,6 +195,13 @@ class NanoVectorDBStorage(BaseVectorStorage):
176
  logger.error(f"Error deleting entity {entity_name}: {e}")
177
 
178
  async def delete_entity_relation(self, entity_name: str) -> None:
 
 
 
 
 
 
 
179
  try:
180
  client = await self._get_client()
181
  storage = getattr(client, "_NanoVectorDB__storage")
@@ -288,7 +314,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
288
  1. Remove the vector database storage file if it exists
289
  2. Reinitialize the vector database client
290
  3. Update flags to notify other processes
291
- 4. Trigger index_done_callback to save the empty state
 
 
292
 
293
  Returns:
294
  dict[str, str]: Operation status and message
 
78
  return self._client
79
 
80
  async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
81
+ """
82
+ Importance notes:
83
+ 1. Changes will be persisted to disk during the next index_done_callback
84
+ 2. Only one process should updating the storage at a time before index_done_callback,
85
+ KG-storage-log should be used to avoid data corruption
86
+ """
87
+
88
  logger.info(f"Inserting {len(data)} to {self.namespace}")
89
  if not data:
90
  return
 
153
  async def delete(self, ids: list[str]):
154
  """Delete vectors with specified IDs
155
 
156
+ Importance notes:
157
+ 1. Changes will be persisted to disk during the next index_done_callback
158
+ 2. Only one process should updating the storage at a time before index_done_callback,
159
+ KG-storage-log should be used to avoid data corruption
160
+
161
  Args:
162
  ids: List of vector IDs to be deleted
163
  """
 
171
  logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
172
 
173
  async def delete_entity(self, entity_name: str) -> None:
174
+ """
175
+ Importance notes:
176
+ 1. Changes will be persisted to disk during the next index_done_callback
177
+ 2. Only one process should updating the storage at a time before index_done_callback,
178
+ KG-storage-log should be used to avoid data corruption
179
+ """
180
+
181
  try:
182
  entity_id = compute_mdhash_id(entity_name, prefix="ent-")
183
  logger.debug(
 
195
  logger.error(f"Error deleting entity {entity_name}: {e}")
196
 
197
  async def delete_entity_relation(self, entity_name: str) -> None:
198
+ """
199
+ Importance notes:
200
+ 1. Changes will be persisted to disk during the next index_done_callback
201
+ 2. Only one process should updating the storage at a time before index_done_callback,
202
+ KG-storage-log should be used to avoid data corruption
203
+ """
204
+
205
  try:
206
  client = await self._get_client()
207
  storage = getattr(client, "_NanoVectorDB__storage")
 
314
  1. Remove the vector database storage file if it exists
315
  2. Reinitialize the vector database client
316
  3. Update flags to notify other processes
317
+ 4. Changes is persisted to disk immediately
318
+
319
+ This method is intended for use in scenarios where all data needs to be removed,
320
 
321
  Returns:
322
  dict[str, str]: Operation status and message
lightrag/kg/networkx_impl.py CHANGED
@@ -156,16 +156,34 @@ class NetworkXStorage(BaseGraphStorage):
156
  return None
157
 
158
  async def upsert_node(self, node_id: str, node_data: dict[str, str]) -> None:
 
 
 
 
 
 
159
  graph = await self._get_graph()
160
  graph.add_node(node_id, **node_data)
161
 
162
  async def upsert_edge(
163
  self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
164
  ) -> None:
 
 
 
 
 
 
165
  graph = await self._get_graph()
166
  graph.add_edge(source_node_id, target_node_id, **edge_data)
167
 
168
  async def delete_node(self, node_id: str) -> None:
 
 
 
 
 
 
169
  graph = await self._get_graph()
170
  if graph.has_node(node_id):
171
  graph.remove_node(node_id)
@@ -173,6 +191,7 @@ class NetworkXStorage(BaseGraphStorage):
173
  else:
174
  logger.warning(f"Node {node_id} not found in the graph for deletion.")
175
 
 
176
  async def embed_nodes(
177
  self, algorithm: str
178
  ) -> tuple[np.ndarray[Any, Any], list[str]]:
@@ -193,6 +212,11 @@ class NetworkXStorage(BaseGraphStorage):
193
  async def remove_nodes(self, nodes: list[str]):
194
  """Delete multiple nodes
195
 
 
 
 
 
 
196
  Args:
197
  nodes: List of node IDs to be deleted
198
  """
@@ -204,6 +228,11 @@ class NetworkXStorage(BaseGraphStorage):
204
  async def remove_edges(self, edges: list[tuple[str, str]]):
205
  """Delete multiple edges
206
 
 
 
 
 
 
207
  Args:
208
  edges: List of edges to be deleted, each edge is a (source, target) tuple
209
  """
@@ -433,7 +462,7 @@ class NetworkXStorage(BaseGraphStorage):
433
  1. Remove the graph storage file if it exists
434
  2. Reset the graph to an empty state
435
  3. Update flags to notify other processes
436
- 4. Trigger index_done_callback to save the empty state
437
 
438
  Returns:
439
  dict[str, str]: Operation status and message
 
156
  return None
157
 
158
  async def upsert_node(self, node_id: str, node_data: dict[str, str]) -> None:
159
+ """
160
+ Importance notes:
161
+ 1. Changes will be persisted to disk during the next index_done_callback
162
+ 2. Only one process should updating the storage at a time before index_done_callback,
163
+ KG-storage-log should be used to avoid data corruption
164
+ """
165
  graph = await self._get_graph()
166
  graph.add_node(node_id, **node_data)
167
 
168
  async def upsert_edge(
169
  self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
170
  ) -> None:
171
+ """
172
+ Importance notes:
173
+ 1. Changes will be persisted to disk during the next index_done_callback
174
+ 2. Only one process should updating the storage at a time before index_done_callback,
175
+ KG-storage-log should be used to avoid data corruption
176
+ """
177
  graph = await self._get_graph()
178
  graph.add_edge(source_node_id, target_node_id, **edge_data)
179
 
180
  async def delete_node(self, node_id: str) -> None:
181
+ """
182
+ Importance notes:
183
+ 1. Changes will be persisted to disk during the next index_done_callback
184
+ 2. Only one process should updating the storage at a time before index_done_callback,
185
+ KG-storage-log should be used to avoid data corruption
186
+ """
187
  graph = await self._get_graph()
188
  if graph.has_node(node_id):
189
  graph.remove_node(node_id)
 
191
  else:
192
  logger.warning(f"Node {node_id} not found in the graph for deletion.")
193
 
194
+ # TODO: NOT USED
195
  async def embed_nodes(
196
  self, algorithm: str
197
  ) -> tuple[np.ndarray[Any, Any], list[str]]:
 
212
  async def remove_nodes(self, nodes: list[str]):
213
  """Delete multiple nodes
214
 
215
+ Importance notes:
216
+ 1. Changes will be persisted to disk during the next index_done_callback
217
+ 2. Only one process should updating the storage at a time before index_done_callback,
218
+ KG-storage-log should be used to avoid data corruption
219
+
220
  Args:
221
  nodes: List of node IDs to be deleted
222
  """
 
228
  async def remove_edges(self, edges: list[tuple[str, str]]):
229
  """Delete multiple edges
230
 
231
+ Importance notes:
232
+ 1. Changes will be persisted to disk during the next index_done_callback
233
+ 2. Only one process should updating the storage at a time before index_done_callback,
234
+ KG-storage-log should be used to avoid data corruption
235
+
236
  Args:
237
  edges: List of edges to be deleted, each edge is a (source, target) tuple
238
  """
 
462
  1. Remove the graph storage file if it exists
463
  2. Reset the graph to an empty state
464
  3. Update flags to notify other processes
465
+ 4. Changes is persisted to disk immediately
466
 
467
  Returns:
468
  dict[str, str]: Operation status and message
lightrag/kg/oracle_impl.py CHANGED
@@ -392,6 +392,33 @@ class OracleKVStorage(BaseKVStorage):
392
  # Oracle handles persistence automatically
393
  pass
394
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
  async def drop(self) -> dict[str, str]:
396
  """Drop the storage"""
397
  try:
 
392
  # Oracle handles persistence automatically
393
  pass
394
 
395
+ async def delete(self, ids: list[str]) -> dict[str, str]:
396
+ """Delete records with specified IDs from the storage.
397
+
398
+ Args:
399
+ ids: List of record IDs to be deleted
400
+
401
+ Returns:
402
+ Dictionary with status and message
403
+ """
404
+ if not ids:
405
+ return {"status": "success", "message": "No IDs provided for deletion"}
406
+
407
+ try:
408
+ table_name = namespace_to_table_name(self.namespace)
409
+ if not table_name:
410
+ return {"status": "error", "message": f"Unknown namespace: {self.namespace}"}
411
+
412
+ ids_list = ",".join([f"'{id}'" for id in ids])
413
+ delete_sql = f"DELETE FROM {table_name} WHERE workspace=:workspace AND id IN ({ids_list})"
414
+
415
+ await self.db.execute(delete_sql, {"workspace": self.db.workspace})
416
+ logger.info(f"Successfully deleted {len(ids)} records from {self.namespace}")
417
+ return {"status": "success", "message": f"Successfully deleted {len(ids)} records"}
418
+ except Exception as e:
419
+ logger.error(f"Error deleting records from {self.namespace}: {e}")
420
+ return {"status": "error", "message": str(e)}
421
+
422
  async def drop(self) -> dict[str, str]:
423
  """Drop the storage"""
424
  try:
lightrag/kg/tidb_impl.py CHANGED
@@ -278,6 +278,35 @@ class TiDBKVStorage(BaseKVStorage):
278
  # Ti handles persistence automatically
279
  pass
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  async def drop(self) -> dict[str, str]:
282
  """Drop the storage"""
283
  try:
@@ -421,11 +450,66 @@ class TiDBVectorDBStorage(BaseVectorStorage):
421
  params = {"workspace": self.db.workspace, "status": status}
422
  return await self.db.query(SQL, params, multirows=True)
423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
  async def delete_entity(self, entity_name: str) -> None:
425
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
 
427
  async def delete_entity_relation(self, entity_name: str) -> None:
428
- raise NotImplementedError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
429
 
430
  async def index_done_callback(self) -> None:
431
  # Ti handles persistence automatically
 
278
  # Ti handles persistence automatically
279
  pass
280
 
281
+ async def delete(self, ids: list[str]) -> dict[str, str]:
282
+ """Delete records with specified IDs from the storage.
283
+
284
+ Args:
285
+ ids: List of record IDs to be deleted
286
+
287
+ Returns:
288
+ Dictionary with status and message
289
+ """
290
+ if not ids:
291
+ return {"status": "success", "message": "No IDs provided for deletion"}
292
+
293
+ try:
294
+ table_name = namespace_to_table_name(self.namespace)
295
+ id_field = namespace_to_id(self.namespace)
296
+
297
+ if not table_name or not id_field:
298
+ return {"status": "error", "message": f"Unknown namespace: {self.namespace}"}
299
+
300
+ ids_list = ",".join([f"'{id}'" for id in ids])
301
+ delete_sql = f"DELETE FROM {table_name} WHERE workspace = :workspace AND {id_field} IN ({ids_list})"
302
+
303
+ await self.db.execute(delete_sql, {"workspace": self.db.workspace})
304
+ logger.info(f"Successfully deleted {len(ids)} records from {self.namespace}")
305
+ return {"status": "success", "message": f"Successfully deleted {len(ids)} records"}
306
+ except Exception as e:
307
+ logger.error(f"Error deleting records from {self.namespace}: {e}")
308
+ return {"status": "error", "message": str(e)}
309
+
310
  async def drop(self) -> dict[str, str]:
311
  """Drop the storage"""
312
  try:
 
450
  params = {"workspace": self.db.workspace, "status": status}
451
  return await self.db.query(SQL, params, multirows=True)
452
 
453
+ async def delete(self, ids: list[str]) -> None:
454
+ """Delete vectors with specified IDs from the storage.
455
+
456
+ Args:
457
+ ids: List of vector IDs to be deleted
458
+ """
459
+ if not ids:
460
+ return
461
+
462
+ table_name = namespace_to_table_name(self.namespace)
463
+ id_field = namespace_to_id(self.namespace)
464
+
465
+ if not table_name or not id_field:
466
+ logger.error(f"Unknown namespace for vector deletion: {self.namespace}")
467
+ return
468
+
469
+ ids_list = ",".join([f"'{id}'" for id in ids])
470
+ delete_sql = f"DELETE FROM {table_name} WHERE workspace = :workspace AND {id_field} IN ({ids_list})"
471
+
472
+ try:
473
+ await self.db.execute(delete_sql, {"workspace": self.db.workspace})
474
+ logger.debug(f"Successfully deleted {len(ids)} vectors from {self.namespace}")
475
+ except Exception as e:
476
+ logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
477
+
478
  async def delete_entity(self, entity_name: str) -> None:
479
+ """Delete an entity by its name from the vector storage.
480
+
481
+ Args:
482
+ entity_name: The name of the entity to delete
483
+ """
484
+ try:
485
+ # Construct SQL to delete the entity
486
+ delete_sql = """DELETE FROM LIGHTRAG_GRAPH_NODES
487
+ WHERE workspace = :workspace AND name = :entity_name"""
488
+
489
+ await self.db.execute(
490
+ delete_sql, {"workspace": self.db.workspace, "entity_name": entity_name}
491
+ )
492
+ logger.debug(f"Successfully deleted entity {entity_name}")
493
+ except Exception as e:
494
+ logger.error(f"Error deleting entity {entity_name}: {e}")
495
 
496
  async def delete_entity_relation(self, entity_name: str) -> None:
497
+ """Delete all relations associated with an entity.
498
+
499
+ Args:
500
+ entity_name: The name of the entity whose relations should be deleted
501
+ """
502
+ try:
503
+ # Delete relations where the entity is either the source or target
504
+ delete_sql = """DELETE FROM LIGHTRAG_GRAPH_EDGES
505
+ WHERE workspace = :workspace AND (source_name = :entity_name OR target_name = :entity_name)"""
506
+
507
+ await self.db.execute(
508
+ delete_sql, {"workspace": self.db.workspace, "entity_name": entity_name}
509
+ )
510
+ logger.debug(f"Successfully deleted relations for entity {entity_name}")
511
+ except Exception as e:
512
+ logger.error(f"Error deleting relations for entity {entity_name}: {e}")
513
 
514
  async def index_done_callback(self) -> None:
515
  # Ti handles persistence automatically
lightrag/lightrag.py CHANGED
@@ -1449,6 +1449,7 @@ class LightRAG:
1449
  loop = always_get_an_event_loop()
1450
  return loop.run_until_complete(self.adelete_by_entity(entity_name))
1451
 
 
1452
  async def adelete_by_entity(self, entity_name: str) -> None:
1453
  try:
1454
  await self.entities_vdb.delete_entity(entity_name)
@@ -1486,6 +1487,7 @@ class LightRAG:
1486
  self.adelete_by_relation(source_entity, target_entity)
1487
  )
1488
 
 
1489
  async def adelete_by_relation(self, source_entity: str, target_entity: str) -> None:
1490
  """Asynchronously delete a relation between two entities.
1491
 
@@ -1555,6 +1557,7 @@ class LightRAG:
1555
  """
1556
  return await self.doc_status.get_docs_by_status(status)
1557
 
 
1558
  async def adelete_by_doc_id(self, doc_id: str) -> None:
1559
  """Delete a document and all its related data
1560
 
@@ -1907,6 +1910,7 @@ class LightRAG:
1907
  """Synchronous version of aclear_cache."""
1908
  return always_get_an_event_loop().run_until_complete(self.aclear_cache(modes))
1909
 
 
1910
  async def aedit_entity(
1911
  self, entity_name: str, updated_data: dict[str, str], allow_rename: bool = True
1912
  ) -> dict[str, Any]:
@@ -2119,6 +2123,7 @@ class LightRAG:
2119
  ]
2120
  )
2121
 
 
2122
  async def aedit_relation(
2123
  self, source_entity: str, target_entity: str, updated_data: dict[str, Any]
2124
  ) -> dict[str, Any]:
@@ -2433,6 +2438,7 @@ class LightRAG:
2433
  self.acreate_relation(source_entity, target_entity, relation_data)
2434
  )
2435
 
 
2436
  async def amerge_entities(
2437
  self,
2438
  source_entities: list[str],
 
1449
  loop = always_get_an_event_loop()
1450
  return loop.run_until_complete(self.adelete_by_entity(entity_name))
1451
 
1452
+ # TODO: Lock all KG relative DB to esure consistency across multiple processes
1453
  async def adelete_by_entity(self, entity_name: str) -> None:
1454
  try:
1455
  await self.entities_vdb.delete_entity(entity_name)
 
1487
  self.adelete_by_relation(source_entity, target_entity)
1488
  )
1489
 
1490
+ # TODO: Lock all KG relative DB to esure consistency across multiple processes
1491
  async def adelete_by_relation(self, source_entity: str, target_entity: str) -> None:
1492
  """Asynchronously delete a relation between two entities.
1493
 
 
1557
  """
1558
  return await self.doc_status.get_docs_by_status(status)
1559
 
1560
+ # TODO: Lock all KG relative DB to esure consistency across multiple processes
1561
  async def adelete_by_doc_id(self, doc_id: str) -> None:
1562
  """Delete a document and all its related data
1563
 
 
1910
  """Synchronous version of aclear_cache."""
1911
  return always_get_an_event_loop().run_until_complete(self.aclear_cache(modes))
1912
 
1913
+ # TODO: Lock all KG relative DB to esure consistency across multiple processes
1914
  async def aedit_entity(
1915
  self, entity_name: str, updated_data: dict[str, str], allow_rename: bool = True
1916
  ) -> dict[str, Any]:
 
2123
  ]
2124
  )
2125
 
2126
+ # TODO: Lock all KG relative DB to esure consistency across multiple processes
2127
  async def aedit_relation(
2128
  self, source_entity: str, target_entity: str, updated_data: dict[str, Any]
2129
  ) -> dict[str, Any]:
 
2438
  self.acreate_relation(source_entity, target_entity, relation_data)
2439
  )
2440
 
2441
+ # TODO: Lock all KG relative DB to esure consistency across multiple processes
2442
  async def amerge_entities(
2443
  self,
2444
  source_entities: list[str],