fixed typo
Browse files- lightrag/lightrag.py +7 -8
lightrag/lightrag.py
CHANGED
|
@@ -493,15 +493,14 @@ class LightRAG:
|
|
| 493 |
}
|
| 494 |
|
| 495 |
# 3. Filter out already processed documents
|
| 496 |
-
new_doc_keys: set[str] = set()
|
| 497 |
# Get docs ids
|
| 498 |
-
|
| 499 |
-
#
|
| 500 |
-
|
| 501 |
-
# Exclude already in
|
| 502 |
-
|
| 503 |
-
# Filter
|
| 504 |
-
new_docs = {doc_id: new_docs[doc_id] for doc_id in
|
| 505 |
|
| 506 |
if not new_docs:
|
| 507 |
logger.info("All documents have been processed or are duplicates")
|
|
|
|
| 493 |
}
|
| 494 |
|
| 495 |
# 3. Filter out already processed documents
|
|
|
|
| 496 |
# Get docs ids
|
| 497 |
+
all_new_doc_ids = set(new_docs.keys())
|
| 498 |
+
# Retrieve IDs that are already being processed
|
| 499 |
+
existing_ids = await self.doc_status.filter_keys(all_new_doc_ids)
|
| 500 |
+
# Exclude IDs of documents that are already in progress
|
| 501 |
+
unique_new_doc_ids = all_new_doc_ids - existing_ids
|
| 502 |
+
# Filter new_docs to only include documents with unique IDs
|
| 503 |
+
new_docs = {doc_id: new_docs[doc_id] for doc_id in unique_new_doc_ids}
|
| 504 |
|
| 505 |
if not new_docs:
|
| 506 |
logger.info("All documents have been processed or are duplicates")
|