yangdx
commited on
Commit
·
fba7aaa
1
Parent(s):
29ad437
Improved document status retrieval with content fallback.
Browse files- Added content fallback to content_summary
- Handled missing fields gracefully
- Made data copy to avoid modification
- Added error logging for missing fields
- Improved code readability and robustness
lightrag/kg/json_doc_status_impl.py
CHANGED
@@ -48,11 +48,20 @@ class JsonDocStatusStorage(DocStatusStorage):
|
|
48 |
self, status: DocStatus
|
49 |
) -> dict[str, DocProcessingStatus]:
|
50 |
"""Get all documents with a specific status"""
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
async def index_done_callback(self) -> None:
|
58 |
write_json(self._data, self._file_name)
|
|
|
48 |
self, status: DocStatus
|
49 |
) -> dict[str, DocProcessingStatus]:
|
50 |
"""Get all documents with a specific status"""
|
51 |
+
result = {}
|
52 |
+
for k, v in self._data.items():
|
53 |
+
if v["status"] == status.value:
|
54 |
+
try:
|
55 |
+
# Make a copy of the data to avoid modifying the original
|
56 |
+
data = v.copy()
|
57 |
+
# If content is missing, use content_summary as content
|
58 |
+
if "content" not in data and "content_summary" in data:
|
59 |
+
data["content"] = data["content_summary"]
|
60 |
+
result[k] = DocProcessingStatus(**data)
|
61 |
+
except KeyError as e:
|
62 |
+
logger.error(f"Missing required field for document {k}: {e}")
|
63 |
+
continue
|
64 |
+
return result
|
65 |
|
66 |
async def index_done_callback(self) -> None:
|
67 |
write_json(self._data, self._file_name)
|