yangdx
commited on
Commit
·
eaa7c14
1
Parent(s):
fc6211e
Add delete support to all storage implementation
Browse files- lightrag/base.py +51 -9
- lightrag/kg/faiss_impl.py +21 -1
- lightrag/kg/json_doc_status_impl.py +8 -4
- lightrag/kg/json_kv_impl.py +8 -4
- lightrag/kg/nano_vector_db_impl.py +29 -1
- lightrag/kg/networkx_impl.py +30 -1
- lightrag/kg/oracle_impl.py +27 -0
- lightrag/kg/tidb_impl.py +86 -2
- lightrag/lightrag.py +6 -0
lightrag/base.py
CHANGED
@@ -153,15 +153,33 @@ class BaseVectorStorage(StorageNameSpace, ABC):
|
|
153 |
|
154 |
@abstractmethod
|
155 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
156 |
-
"""Insert or update vectors in the storage.
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
@abstractmethod
|
159 |
async def delete_entity(self, entity_name: str) -> None:
|
160 |
-
"""Delete a single entity by its name.
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
@abstractmethod
|
163 |
async def delete_entity_relation(self, entity_name: str) -> None:
|
164 |
-
"""Delete relations for a given entity.
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
@abstractmethod
|
167 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
@@ -187,6 +205,19 @@ class BaseVectorStorage(StorageNameSpace, ABC):
|
|
187 |
"""
|
188 |
pass
|
189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
|
191 |
@dataclass
|
192 |
class BaseKVStorage(StorageNameSpace, ABC):
|
@@ -206,16 +237,20 @@ class BaseKVStorage(StorageNameSpace, ABC):
|
|
206 |
|
207 |
@abstractmethod
|
208 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
209 |
-
"""Upsert data
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
@abstractmethod
|
212 |
async def delete(self, ids: list[str]) -> None:
|
213 |
"""Delete specific records from storage by their IDs
|
214 |
|
215 |
-
|
216 |
-
1.
|
217 |
-
2.
|
218 |
-
3. For in-memory DB, changes will be persisted to disk during the next index_done_callback
|
219 |
|
220 |
Args:
|
221 |
ids (list[str]): List of document IDs to be deleted from storage
|
@@ -267,7 +302,14 @@ class BaseGraphStorage(StorageNameSpace, ABC):
|
|
267 |
async def upsert_edge(
|
268 |
self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
|
269 |
) -> None:
|
270 |
-
"""Delete a node from the graph.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
|
272 |
@abstractmethod
|
273 |
async def delete_node(self, node_id: str) -> None:
|
|
|
153 |
|
154 |
@abstractmethod
|
155 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
156 |
+
"""Insert or update vectors in the storage.
|
157 |
+
|
158 |
+
Importance notes for in-memory storage:
|
159 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
160 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
161 |
+
KG-storage-log should be used to avoid data corruption
|
162 |
+
"""
|
163 |
|
164 |
@abstractmethod
|
165 |
async def delete_entity(self, entity_name: str) -> None:
|
166 |
+
"""Delete a single entity by its name.
|
167 |
+
|
168 |
+
Importance notes for in-memory storage:
|
169 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
170 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
171 |
+
KG-storage-log should be used to avoid data corruption
|
172 |
+
"""
|
173 |
|
174 |
@abstractmethod
|
175 |
async def delete_entity_relation(self, entity_name: str) -> None:
|
176 |
+
"""Delete relations for a given entity.
|
177 |
+
|
178 |
+
Importance notes for in-memory storage:
|
179 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
180 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
181 |
+
KG-storage-log should be used to avoid data corruption
|
182 |
+
"""
|
183 |
|
184 |
@abstractmethod
|
185 |
async def get_by_id(self, id: str) -> dict[str, Any] | None:
|
|
|
205 |
"""
|
206 |
pass
|
207 |
|
208 |
+
@abstractmethod
|
209 |
+
async def delete(self, ids: list[str]):
|
210 |
+
"""Delete vectors with specified IDs
|
211 |
+
|
212 |
+
Importance notes for in-memory storage:
|
213 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
214 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
215 |
+
KG-storage-log should be used to avoid data corruption
|
216 |
+
|
217 |
+
Args:
|
218 |
+
ids: List of vector IDs to be deleted
|
219 |
+
"""
|
220 |
+
|
221 |
|
222 |
@dataclass
|
223 |
class BaseKVStorage(StorageNameSpace, ABC):
|
|
|
237 |
|
238 |
@abstractmethod
|
239 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
240 |
+
"""Upsert data
|
241 |
+
|
242 |
+
Importance notes for in-memory storage:
|
243 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
244 |
+
2. update flags to notify other processes that data persistence is needed
|
245 |
+
"""
|
246 |
|
247 |
@abstractmethod
|
248 |
async def delete(self, ids: list[str]) -> None:
|
249 |
"""Delete specific records from storage by their IDs
|
250 |
|
251 |
+
Importance notes for in-memory storage:
|
252 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
253 |
+
2. update flags to notify other processes that data persistence is needed
|
|
|
254 |
|
255 |
Args:
|
256 |
ids (list[str]): List of document IDs to be deleted from storage
|
|
|
302 |
async def upsert_edge(
|
303 |
self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
|
304 |
) -> None:
|
305 |
+
"""Delete a node from the graph.
|
306 |
+
|
307 |
+
Importance notes for in-memory storage:
|
308 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
309 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
310 |
+
KG-storage-log should be used to avoid data corruption
|
311 |
+
"""
|
312 |
+
|
313 |
|
314 |
@abstractmethod
|
315 |
async def delete_node(self, node_id: str) -> None:
|
lightrag/kg/faiss_impl.py
CHANGED
@@ -217,6 +217,11 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|
217 |
async def delete(self, ids: list[str]):
|
218 |
"""
|
219 |
Delete vectors for the provided custom IDs.
|
|
|
|
|
|
|
|
|
|
|
220 |
"""
|
221 |
logger.info(f"Deleting {len(ids)} vectors from {self.namespace}")
|
222 |
to_remove = []
|
@@ -232,13 +237,22 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|
232 |
)
|
233 |
|
234 |
async def delete_entity(self, entity_name: str) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
236 |
logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
|
237 |
await self.delete([entity_id])
|
238 |
|
239 |
async def delete_entity_relation(self, entity_name: str) -> None:
|
240 |
"""
|
241 |
-
|
|
|
|
|
|
|
242 |
"""
|
243 |
logger.debug(f"Searching relations for entity {entity_name}")
|
244 |
relations = []
|
@@ -433,6 +447,12 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|
433 |
async def drop(self) -> dict[str, str]:
|
434 |
"""Drop all vector data from storage and clean up resources
|
435 |
|
|
|
|
|
|
|
|
|
|
|
|
|
436 |
This method will remove all vectors from the Faiss index and delete the storage files.
|
437 |
|
438 |
Returns:
|
|
|
217 |
async def delete(self, ids: list[str]):
|
218 |
"""
|
219 |
Delete vectors for the provided custom IDs.
|
220 |
+
|
221 |
+
Importance notes:
|
222 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
223 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
224 |
+
KG-storage-log should be used to avoid data corruption
|
225 |
"""
|
226 |
logger.info(f"Deleting {len(ids)} vectors from {self.namespace}")
|
227 |
to_remove = []
|
|
|
237 |
)
|
238 |
|
239 |
async def delete_entity(self, entity_name: str) -> None:
|
240 |
+
"""
|
241 |
+
Importance notes:
|
242 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
243 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
244 |
+
KG-storage-log should be used to avoid data corruption
|
245 |
+
"""
|
246 |
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
247 |
logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
|
248 |
await self.delete([entity_id])
|
249 |
|
250 |
async def delete_entity_relation(self, entity_name: str) -> None:
|
251 |
"""
|
252 |
+
Importance notes:
|
253 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
254 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
255 |
+
KG-storage-log should be used to avoid data corruption
|
256 |
"""
|
257 |
logger.debug(f"Searching relations for entity {entity_name}")
|
258 |
relations = []
|
|
|
447 |
async def drop(self) -> dict[str, str]:
|
448 |
"""Drop all vector data from storage and clean up resources
|
449 |
|
450 |
+
This method will:
|
451 |
+
1. Remove the vector database storage file if it exists
|
452 |
+
2. Reinitialize the vector database client
|
453 |
+
3. Update flags to notify other processes
|
454 |
+
4. Changes is persisted to disk immediately
|
455 |
+
|
456 |
This method will remove all vectors from the Faiss index and delete the storage files.
|
457 |
|
458 |
Returns:
|
lightrag/kg/json_doc_status_impl.py
CHANGED
@@ -109,6 +109,11 @@ class JsonDocStatusStorage(DocStatusStorage):
|
|
109 |
await clear_all_update_flags(self.namespace)
|
110 |
|
111 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
|
|
|
|
|
|
|
|
|
|
112 |
if not data:
|
113 |
return
|
114 |
logger.info(f"Inserting {len(data)} records to {self.namespace}")
|
@@ -125,10 +130,9 @@ class JsonDocStatusStorage(DocStatusStorage):
|
|
125 |
async def delete(self, doc_ids: list[str]):
|
126 |
"""Delete specific records from storage by their IDs
|
127 |
|
128 |
-
|
129 |
-
1.
|
130 |
-
2.
|
131 |
-
3. The changes will be persisted to disk during the next index_done_callback
|
132 |
|
133 |
Args:
|
134 |
ids (list[str]): List of document IDs to be deleted from storage
|
|
|
109 |
await clear_all_update_flags(self.namespace)
|
110 |
|
111 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
112 |
+
"""
|
113 |
+
Importance notes for in-memory storage:
|
114 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
115 |
+
2. update flags to notify other processes that data persistence is needed
|
116 |
+
"""
|
117 |
if not data:
|
118 |
return
|
119 |
logger.info(f"Inserting {len(data)} records to {self.namespace}")
|
|
|
130 |
async def delete(self, doc_ids: list[str]):
|
131 |
"""Delete specific records from storage by their IDs
|
132 |
|
133 |
+
Importance notes for in-memory storage:
|
134 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
135 |
+
2. update flags to notify other processes that data persistence is needed
|
|
|
136 |
|
137 |
Args:
|
138 |
ids (list[str]): List of document IDs to be deleted from storage
|
lightrag/kg/json_kv_impl.py
CHANGED
@@ -114,6 +114,11 @@ class JsonKVStorage(BaseKVStorage):
|
|
114 |
return set(keys) - set(self._data.keys())
|
115 |
|
116 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
|
|
|
|
|
|
|
|
|
|
117 |
if not data:
|
118 |
return
|
119 |
logger.info(f"Inserting {len(data)} records to {self.namespace}")
|
@@ -124,10 +129,9 @@ class JsonKVStorage(BaseKVStorage):
|
|
124 |
async def delete(self, ids: list[str]) -> None:
|
125 |
"""Delete specific records from storage by their IDs
|
126 |
|
127 |
-
|
128 |
-
1.
|
129 |
-
2.
|
130 |
-
3. The changes will be persisted to disk during the next index_done_callback
|
131 |
|
132 |
Args:
|
133 |
ids (list[str]): List of document IDs to be deleted from storage
|
|
|
114 |
return set(keys) - set(self._data.keys())
|
115 |
|
116 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
117 |
+
"""
|
118 |
+
Importance notes for in-memory storage:
|
119 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
120 |
+
2. update flags to notify other processes that data persistence is needed
|
121 |
+
"""
|
122 |
if not data:
|
123 |
return
|
124 |
logger.info(f"Inserting {len(data)} records to {self.namespace}")
|
|
|
129 |
async def delete(self, ids: list[str]) -> None:
|
130 |
"""Delete specific records from storage by their IDs
|
131 |
|
132 |
+
Importance notes for in-memory storage:
|
133 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
134 |
+
2. update flags to notify other processes that data persistence is needed
|
|
|
135 |
|
136 |
Args:
|
137 |
ids (list[str]): List of document IDs to be deleted from storage
|
lightrag/kg/nano_vector_db_impl.py
CHANGED
@@ -78,6 +78,13 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
78 |
return self._client
|
79 |
|
80 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
logger.info(f"Inserting {len(data)} to {self.namespace}")
|
82 |
if not data:
|
83 |
return
|
@@ -146,6 +153,11 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
146 |
async def delete(self, ids: list[str]):
|
147 |
"""Delete vectors with specified IDs
|
148 |
|
|
|
|
|
|
|
|
|
|
|
149 |
Args:
|
150 |
ids: List of vector IDs to be deleted
|
151 |
"""
|
@@ -159,6 +171,13 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
159 |
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
160 |
|
161 |
async def delete_entity(self, entity_name: str) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
try:
|
163 |
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
164 |
logger.debug(
|
@@ -176,6 +195,13 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
176 |
logger.error(f"Error deleting entity {entity_name}: {e}")
|
177 |
|
178 |
async def delete_entity_relation(self, entity_name: str) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
try:
|
180 |
client = await self._get_client()
|
181 |
storage = getattr(client, "_NanoVectorDB__storage")
|
@@ -288,7 +314,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|
288 |
1. Remove the vector database storage file if it exists
|
289 |
2. Reinitialize the vector database client
|
290 |
3. Update flags to notify other processes
|
291 |
-
4.
|
|
|
|
|
292 |
|
293 |
Returns:
|
294 |
dict[str, str]: Operation status and message
|
|
|
78 |
return self._client
|
79 |
|
80 |
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
|
81 |
+
"""
|
82 |
+
Importance notes:
|
83 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
84 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
85 |
+
KG-storage-log should be used to avoid data corruption
|
86 |
+
"""
|
87 |
+
|
88 |
logger.info(f"Inserting {len(data)} to {self.namespace}")
|
89 |
if not data:
|
90 |
return
|
|
|
153 |
async def delete(self, ids: list[str]):
|
154 |
"""Delete vectors with specified IDs
|
155 |
|
156 |
+
Importance notes:
|
157 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
158 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
159 |
+
KG-storage-log should be used to avoid data corruption
|
160 |
+
|
161 |
Args:
|
162 |
ids: List of vector IDs to be deleted
|
163 |
"""
|
|
|
171 |
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
172 |
|
173 |
async def delete_entity(self, entity_name: str) -> None:
|
174 |
+
"""
|
175 |
+
Importance notes:
|
176 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
177 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
178 |
+
KG-storage-log should be used to avoid data corruption
|
179 |
+
"""
|
180 |
+
|
181 |
try:
|
182 |
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
183 |
logger.debug(
|
|
|
195 |
logger.error(f"Error deleting entity {entity_name}: {e}")
|
196 |
|
197 |
async def delete_entity_relation(self, entity_name: str) -> None:
|
198 |
+
"""
|
199 |
+
Importance notes:
|
200 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
201 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
202 |
+
KG-storage-log should be used to avoid data corruption
|
203 |
+
"""
|
204 |
+
|
205 |
try:
|
206 |
client = await self._get_client()
|
207 |
storage = getattr(client, "_NanoVectorDB__storage")
|
|
|
314 |
1. Remove the vector database storage file if it exists
|
315 |
2. Reinitialize the vector database client
|
316 |
3. Update flags to notify other processes
|
317 |
+
4. Changes is persisted to disk immediately
|
318 |
+
|
319 |
+
This method is intended for use in scenarios where all data needs to be removed,
|
320 |
|
321 |
Returns:
|
322 |
dict[str, str]: Operation status and message
|
lightrag/kg/networkx_impl.py
CHANGED
@@ -156,16 +156,34 @@ class NetworkXStorage(BaseGraphStorage):
|
|
156 |
return None
|
157 |
|
158 |
async def upsert_node(self, node_id: str, node_data: dict[str, str]) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
graph = await self._get_graph()
|
160 |
graph.add_node(node_id, **node_data)
|
161 |
|
162 |
async def upsert_edge(
|
163 |
self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
|
164 |
) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
graph = await self._get_graph()
|
166 |
graph.add_edge(source_node_id, target_node_id, **edge_data)
|
167 |
|
168 |
async def delete_node(self, node_id: str) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
graph = await self._get_graph()
|
170 |
if graph.has_node(node_id):
|
171 |
graph.remove_node(node_id)
|
@@ -173,6 +191,7 @@ class NetworkXStorage(BaseGraphStorage):
|
|
173 |
else:
|
174 |
logger.warning(f"Node {node_id} not found in the graph for deletion.")
|
175 |
|
|
|
176 |
async def embed_nodes(
|
177 |
self, algorithm: str
|
178 |
) -> tuple[np.ndarray[Any, Any], list[str]]:
|
@@ -193,6 +212,11 @@ class NetworkXStorage(BaseGraphStorage):
|
|
193 |
async def remove_nodes(self, nodes: list[str]):
|
194 |
"""Delete multiple nodes
|
195 |
|
|
|
|
|
|
|
|
|
|
|
196 |
Args:
|
197 |
nodes: List of node IDs to be deleted
|
198 |
"""
|
@@ -204,6 +228,11 @@ class NetworkXStorage(BaseGraphStorage):
|
|
204 |
async def remove_edges(self, edges: list[tuple[str, str]]):
|
205 |
"""Delete multiple edges
|
206 |
|
|
|
|
|
|
|
|
|
|
|
207 |
Args:
|
208 |
edges: List of edges to be deleted, each edge is a (source, target) tuple
|
209 |
"""
|
@@ -433,7 +462,7 @@ class NetworkXStorage(BaseGraphStorage):
|
|
433 |
1. Remove the graph storage file if it exists
|
434 |
2. Reset the graph to an empty state
|
435 |
3. Update flags to notify other processes
|
436 |
-
4.
|
437 |
|
438 |
Returns:
|
439 |
dict[str, str]: Operation status and message
|
|
|
156 |
return None
|
157 |
|
158 |
async def upsert_node(self, node_id: str, node_data: dict[str, str]) -> None:
|
159 |
+
"""
|
160 |
+
Importance notes:
|
161 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
162 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
163 |
+
KG-storage-log should be used to avoid data corruption
|
164 |
+
"""
|
165 |
graph = await self._get_graph()
|
166 |
graph.add_node(node_id, **node_data)
|
167 |
|
168 |
async def upsert_edge(
|
169 |
self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
|
170 |
) -> None:
|
171 |
+
"""
|
172 |
+
Importance notes:
|
173 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
174 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
175 |
+
KG-storage-log should be used to avoid data corruption
|
176 |
+
"""
|
177 |
graph = await self._get_graph()
|
178 |
graph.add_edge(source_node_id, target_node_id, **edge_data)
|
179 |
|
180 |
async def delete_node(self, node_id: str) -> None:
|
181 |
+
"""
|
182 |
+
Importance notes:
|
183 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
184 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
185 |
+
KG-storage-log should be used to avoid data corruption
|
186 |
+
"""
|
187 |
graph = await self._get_graph()
|
188 |
if graph.has_node(node_id):
|
189 |
graph.remove_node(node_id)
|
|
|
191 |
else:
|
192 |
logger.warning(f"Node {node_id} not found in the graph for deletion.")
|
193 |
|
194 |
+
# TODO: NOT USED
|
195 |
async def embed_nodes(
|
196 |
self, algorithm: str
|
197 |
) -> tuple[np.ndarray[Any, Any], list[str]]:
|
|
|
212 |
async def remove_nodes(self, nodes: list[str]):
|
213 |
"""Delete multiple nodes
|
214 |
|
215 |
+
Importance notes:
|
216 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
217 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
218 |
+
KG-storage-log should be used to avoid data corruption
|
219 |
+
|
220 |
Args:
|
221 |
nodes: List of node IDs to be deleted
|
222 |
"""
|
|
|
228 |
async def remove_edges(self, edges: list[tuple[str, str]]):
|
229 |
"""Delete multiple edges
|
230 |
|
231 |
+
Importance notes:
|
232 |
+
1. Changes will be persisted to disk during the next index_done_callback
|
233 |
+
2. Only one process should updating the storage at a time before index_done_callback,
|
234 |
+
KG-storage-log should be used to avoid data corruption
|
235 |
+
|
236 |
Args:
|
237 |
edges: List of edges to be deleted, each edge is a (source, target) tuple
|
238 |
"""
|
|
|
462 |
1. Remove the graph storage file if it exists
|
463 |
2. Reset the graph to an empty state
|
464 |
3. Update flags to notify other processes
|
465 |
+
4. Changes is persisted to disk immediately
|
466 |
|
467 |
Returns:
|
468 |
dict[str, str]: Operation status and message
|
lightrag/kg/oracle_impl.py
CHANGED
@@ -392,6 +392,33 @@ class OracleKVStorage(BaseKVStorage):
|
|
392 |
# Oracle handles persistence automatically
|
393 |
pass
|
394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
async def drop(self) -> dict[str, str]:
|
396 |
"""Drop the storage"""
|
397 |
try:
|
|
|
392 |
# Oracle handles persistence automatically
|
393 |
pass
|
394 |
|
395 |
+
async def delete(self, ids: list[str]) -> dict[str, str]:
|
396 |
+
"""Delete records with specified IDs from the storage.
|
397 |
+
|
398 |
+
Args:
|
399 |
+
ids: List of record IDs to be deleted
|
400 |
+
|
401 |
+
Returns:
|
402 |
+
Dictionary with status and message
|
403 |
+
"""
|
404 |
+
if not ids:
|
405 |
+
return {"status": "success", "message": "No IDs provided for deletion"}
|
406 |
+
|
407 |
+
try:
|
408 |
+
table_name = namespace_to_table_name(self.namespace)
|
409 |
+
if not table_name:
|
410 |
+
return {"status": "error", "message": f"Unknown namespace: {self.namespace}"}
|
411 |
+
|
412 |
+
ids_list = ",".join([f"'{id}'" for id in ids])
|
413 |
+
delete_sql = f"DELETE FROM {table_name} WHERE workspace=:workspace AND id IN ({ids_list})"
|
414 |
+
|
415 |
+
await self.db.execute(delete_sql, {"workspace": self.db.workspace})
|
416 |
+
logger.info(f"Successfully deleted {len(ids)} records from {self.namespace}")
|
417 |
+
return {"status": "success", "message": f"Successfully deleted {len(ids)} records"}
|
418 |
+
except Exception as e:
|
419 |
+
logger.error(f"Error deleting records from {self.namespace}: {e}")
|
420 |
+
return {"status": "error", "message": str(e)}
|
421 |
+
|
422 |
async def drop(self) -> dict[str, str]:
|
423 |
"""Drop the storage"""
|
424 |
try:
|
lightrag/kg/tidb_impl.py
CHANGED
@@ -278,6 +278,35 @@ class TiDBKVStorage(BaseKVStorage):
|
|
278 |
# Ti handles persistence automatically
|
279 |
pass
|
280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
async def drop(self) -> dict[str, str]:
|
282 |
"""Drop the storage"""
|
283 |
try:
|
@@ -421,11 +450,66 @@ class TiDBVectorDBStorage(BaseVectorStorage):
|
|
421 |
params = {"workspace": self.db.workspace, "status": status}
|
422 |
return await self.db.query(SQL, params, multirows=True)
|
423 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
async def delete_entity(self, entity_name: str) -> None:
|
425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
426 |
|
427 |
async def delete_entity_relation(self, entity_name: str) -> None:
|
428 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
429 |
|
430 |
async def index_done_callback(self) -> None:
|
431 |
# Ti handles persistence automatically
|
|
|
278 |
# Ti handles persistence automatically
|
279 |
pass
|
280 |
|
281 |
+
async def delete(self, ids: list[str]) -> dict[str, str]:
|
282 |
+
"""Delete records with specified IDs from the storage.
|
283 |
+
|
284 |
+
Args:
|
285 |
+
ids: List of record IDs to be deleted
|
286 |
+
|
287 |
+
Returns:
|
288 |
+
Dictionary with status and message
|
289 |
+
"""
|
290 |
+
if not ids:
|
291 |
+
return {"status": "success", "message": "No IDs provided for deletion"}
|
292 |
+
|
293 |
+
try:
|
294 |
+
table_name = namespace_to_table_name(self.namespace)
|
295 |
+
id_field = namespace_to_id(self.namespace)
|
296 |
+
|
297 |
+
if not table_name or not id_field:
|
298 |
+
return {"status": "error", "message": f"Unknown namespace: {self.namespace}"}
|
299 |
+
|
300 |
+
ids_list = ",".join([f"'{id}'" for id in ids])
|
301 |
+
delete_sql = f"DELETE FROM {table_name} WHERE workspace = :workspace AND {id_field} IN ({ids_list})"
|
302 |
+
|
303 |
+
await self.db.execute(delete_sql, {"workspace": self.db.workspace})
|
304 |
+
logger.info(f"Successfully deleted {len(ids)} records from {self.namespace}")
|
305 |
+
return {"status": "success", "message": f"Successfully deleted {len(ids)} records"}
|
306 |
+
except Exception as e:
|
307 |
+
logger.error(f"Error deleting records from {self.namespace}: {e}")
|
308 |
+
return {"status": "error", "message": str(e)}
|
309 |
+
|
310 |
async def drop(self) -> dict[str, str]:
|
311 |
"""Drop the storage"""
|
312 |
try:
|
|
|
450 |
params = {"workspace": self.db.workspace, "status": status}
|
451 |
return await self.db.query(SQL, params, multirows=True)
|
452 |
|
453 |
+
async def delete(self, ids: list[str]) -> None:
|
454 |
+
"""Delete vectors with specified IDs from the storage.
|
455 |
+
|
456 |
+
Args:
|
457 |
+
ids: List of vector IDs to be deleted
|
458 |
+
"""
|
459 |
+
if not ids:
|
460 |
+
return
|
461 |
+
|
462 |
+
table_name = namespace_to_table_name(self.namespace)
|
463 |
+
id_field = namespace_to_id(self.namespace)
|
464 |
+
|
465 |
+
if not table_name or not id_field:
|
466 |
+
logger.error(f"Unknown namespace for vector deletion: {self.namespace}")
|
467 |
+
return
|
468 |
+
|
469 |
+
ids_list = ",".join([f"'{id}'" for id in ids])
|
470 |
+
delete_sql = f"DELETE FROM {table_name} WHERE workspace = :workspace AND {id_field} IN ({ids_list})"
|
471 |
+
|
472 |
+
try:
|
473 |
+
await self.db.execute(delete_sql, {"workspace": self.db.workspace})
|
474 |
+
logger.debug(f"Successfully deleted {len(ids)} vectors from {self.namespace}")
|
475 |
+
except Exception as e:
|
476 |
+
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
477 |
+
|
478 |
async def delete_entity(self, entity_name: str) -> None:
|
479 |
+
"""Delete an entity by its name from the vector storage.
|
480 |
+
|
481 |
+
Args:
|
482 |
+
entity_name: The name of the entity to delete
|
483 |
+
"""
|
484 |
+
try:
|
485 |
+
# Construct SQL to delete the entity
|
486 |
+
delete_sql = """DELETE FROM LIGHTRAG_GRAPH_NODES
|
487 |
+
WHERE workspace = :workspace AND name = :entity_name"""
|
488 |
+
|
489 |
+
await self.db.execute(
|
490 |
+
delete_sql, {"workspace": self.db.workspace, "entity_name": entity_name}
|
491 |
+
)
|
492 |
+
logger.debug(f"Successfully deleted entity {entity_name}")
|
493 |
+
except Exception as e:
|
494 |
+
logger.error(f"Error deleting entity {entity_name}: {e}")
|
495 |
|
496 |
async def delete_entity_relation(self, entity_name: str) -> None:
|
497 |
+
"""Delete all relations associated with an entity.
|
498 |
+
|
499 |
+
Args:
|
500 |
+
entity_name: The name of the entity whose relations should be deleted
|
501 |
+
"""
|
502 |
+
try:
|
503 |
+
# Delete relations where the entity is either the source or target
|
504 |
+
delete_sql = """DELETE FROM LIGHTRAG_GRAPH_EDGES
|
505 |
+
WHERE workspace = :workspace AND (source_name = :entity_name OR target_name = :entity_name)"""
|
506 |
+
|
507 |
+
await self.db.execute(
|
508 |
+
delete_sql, {"workspace": self.db.workspace, "entity_name": entity_name}
|
509 |
+
)
|
510 |
+
logger.debug(f"Successfully deleted relations for entity {entity_name}")
|
511 |
+
except Exception as e:
|
512 |
+
logger.error(f"Error deleting relations for entity {entity_name}: {e}")
|
513 |
|
514 |
async def index_done_callback(self) -> None:
|
515 |
# Ti handles persistence automatically
|
lightrag/lightrag.py
CHANGED
@@ -1449,6 +1449,7 @@ class LightRAG:
|
|
1449 |
loop = always_get_an_event_loop()
|
1450 |
return loop.run_until_complete(self.adelete_by_entity(entity_name))
|
1451 |
|
|
|
1452 |
async def adelete_by_entity(self, entity_name: str) -> None:
|
1453 |
try:
|
1454 |
await self.entities_vdb.delete_entity(entity_name)
|
@@ -1486,6 +1487,7 @@ class LightRAG:
|
|
1486 |
self.adelete_by_relation(source_entity, target_entity)
|
1487 |
)
|
1488 |
|
|
|
1489 |
async def adelete_by_relation(self, source_entity: str, target_entity: str) -> None:
|
1490 |
"""Asynchronously delete a relation between two entities.
|
1491 |
|
@@ -1555,6 +1557,7 @@ class LightRAG:
|
|
1555 |
"""
|
1556 |
return await self.doc_status.get_docs_by_status(status)
|
1557 |
|
|
|
1558 |
async def adelete_by_doc_id(self, doc_id: str) -> None:
|
1559 |
"""Delete a document and all its related data
|
1560 |
|
@@ -1907,6 +1910,7 @@ class LightRAG:
|
|
1907 |
"""Synchronous version of aclear_cache."""
|
1908 |
return always_get_an_event_loop().run_until_complete(self.aclear_cache(modes))
|
1909 |
|
|
|
1910 |
async def aedit_entity(
|
1911 |
self, entity_name: str, updated_data: dict[str, str], allow_rename: bool = True
|
1912 |
) -> dict[str, Any]:
|
@@ -2119,6 +2123,7 @@ class LightRAG:
|
|
2119 |
]
|
2120 |
)
|
2121 |
|
|
|
2122 |
async def aedit_relation(
|
2123 |
self, source_entity: str, target_entity: str, updated_data: dict[str, Any]
|
2124 |
) -> dict[str, Any]:
|
@@ -2433,6 +2438,7 @@ class LightRAG:
|
|
2433 |
self.acreate_relation(source_entity, target_entity, relation_data)
|
2434 |
)
|
2435 |
|
|
|
2436 |
async def amerge_entities(
|
2437 |
self,
|
2438 |
source_entities: list[str],
|
|
|
1449 |
loop = always_get_an_event_loop()
|
1450 |
return loop.run_until_complete(self.adelete_by_entity(entity_name))
|
1451 |
|
1452 |
+
# TODO: Lock all KG relative DB to esure consistency across multiple processes
|
1453 |
async def adelete_by_entity(self, entity_name: str) -> None:
|
1454 |
try:
|
1455 |
await self.entities_vdb.delete_entity(entity_name)
|
|
|
1487 |
self.adelete_by_relation(source_entity, target_entity)
|
1488 |
)
|
1489 |
|
1490 |
+
# TODO: Lock all KG relative DB to esure consistency across multiple processes
|
1491 |
async def adelete_by_relation(self, source_entity: str, target_entity: str) -> None:
|
1492 |
"""Asynchronously delete a relation between two entities.
|
1493 |
|
|
|
1557 |
"""
|
1558 |
return await self.doc_status.get_docs_by_status(status)
|
1559 |
|
1560 |
+
# TODO: Lock all KG relative DB to esure consistency across multiple processes
|
1561 |
async def adelete_by_doc_id(self, doc_id: str) -> None:
|
1562 |
"""Delete a document and all its related data
|
1563 |
|
|
|
1910 |
"""Synchronous version of aclear_cache."""
|
1911 |
return always_get_an_event_loop().run_until_complete(self.aclear_cache(modes))
|
1912 |
|
1913 |
+
# TODO: Lock all KG relative DB to esure consistency across multiple processes
|
1914 |
async def aedit_entity(
|
1915 |
self, entity_name: str, updated_data: dict[str, str], allow_rename: bool = True
|
1916 |
) -> dict[str, Any]:
|
|
|
2123 |
]
|
2124 |
)
|
2125 |
|
2126 |
+
# TODO: Lock all KG relative DB to esure consistency across multiple processes
|
2127 |
async def aedit_relation(
|
2128 |
self, source_entity: str, target_entity: str, updated_data: dict[str, Any]
|
2129 |
) -> dict[str, Any]:
|
|
|
2438 |
self.acreate_relation(source_entity, target_entity, relation_data)
|
2439 |
)
|
2440 |
|
2441 |
+
# TODO: Lock all KG relative DB to esure consistency across multiple processes
|
2442 |
async def amerge_entities(
|
2443 |
self,
|
2444 |
source_entities: list[str],
|