yangdx commited on
Commit
fba7aaa
·
1 Parent(s): 29ad437

Improved document status retrieval with content fallback.

Browse files

- Added content fallback to content_summary
- Handled missing fields gracefully
- Made data copy to avoid modification
- Added error logging for missing fields
- Improved code readability and robustness

Files changed (1) hide show
  1. lightrag/kg/json_doc_status_impl.py +14 -5
lightrag/kg/json_doc_status_impl.py CHANGED
@@ -48,11 +48,20 @@ class JsonDocStatusStorage(DocStatusStorage):
48
  self, status: DocStatus
49
  ) -> dict[str, DocProcessingStatus]:
50
  """Get all documents with a specific status"""
51
- return {
52
- k: DocProcessingStatus(**v)
53
- for k, v in self._data.items()
54
- if v["status"] == status.value
55
- }
 
 
 
 
 
 
 
 
 
56
 
57
  async def index_done_callback(self) -> None:
58
  write_json(self._data, self._file_name)
 
48
  self, status: DocStatus
49
  ) -> dict[str, DocProcessingStatus]:
50
  """Get all documents with a specific status"""
51
+ result = {}
52
+ for k, v in self._data.items():
53
+ if v["status"] == status.value:
54
+ try:
55
+ # Make a copy of the data to avoid modifying the original
56
+ data = v.copy()
57
+ # If content is missing, use content_summary as content
58
+ if "content" not in data and "content_summary" in data:
59
+ data["content"] = data["content_summary"]
60
+ result[k] = DocProcessingStatus(**data)
61
+ except KeyError as e:
62
+ logger.error(f"Missing required field for document {k}: {e}")
63
+ continue
64
+ return result
65
 
66
  async def index_done_callback(self) -> None:
67
  write_json(self._data, self._file_name)