ArnoChen
commited on
Commit
·
8104d23
1
Parent(s):
2fab39b
fix doc_key filtering logic to handle dict status
Browse files- lightrag/lightrag.py +17 -6
lightrag/lightrag.py
CHANGED
|
@@ -372,12 +372,23 @@ class LightRAG:
|
|
| 372 |
|
| 373 |
# 3. Filter out already processed documents
|
| 374 |
# _add_doc_keys = await self.doc_status.filter_keys(list(new_docs.keys()))
|
| 375 |
-
_add_doc_keys =
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 381 |
new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
|
| 382 |
|
| 383 |
if not new_docs:
|
|
|
|
| 372 |
|
| 373 |
# 3. Filter out already processed documents
|
| 374 |
# _add_doc_keys = await self.doc_status.filter_keys(list(new_docs.keys()))
|
| 375 |
+
_add_doc_keys = set()
|
| 376 |
+
for doc_id in new_docs.keys():
|
| 377 |
+
current_doc = await self.doc_status.get_by_id(doc_id)
|
| 378 |
+
|
| 379 |
+
if current_doc is None:
|
| 380 |
+
_add_doc_keys.add(doc_id)
|
| 381 |
+
continue # skip to the next doc_id
|
| 382 |
+
|
| 383 |
+
status = None
|
| 384 |
+
if isinstance(current_doc, dict):
|
| 385 |
+
status = current_doc["status"]
|
| 386 |
+
else:
|
| 387 |
+
status = current_doc.status
|
| 388 |
+
|
| 389 |
+
if status == DocStatus.FAILED:
|
| 390 |
+
_add_doc_keys.add(doc_id)
|
| 391 |
+
|
| 392 |
new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
|
| 393 |
|
| 394 |
if not new_docs:
|