Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
ingestor context handling
Browse files- utils/sources.py +25 -8
utils/sources.py
CHANGED
|
@@ -122,15 +122,32 @@ def _process_context(context: Union[str, List[Dict[str, Any]]]) -> tuple[str, Li
|
|
| 122 |
if isinstance(result, str):
|
| 123 |
result = ast.literal_eval(result)
|
| 124 |
|
|
|
|
|
|
|
| 125 |
metadata = result.get('answer_metadata', {})
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
processed_results.append(doc_info)
|
| 135 |
|
| 136 |
# Format context string - SIMPLIFIED TO ONLY USE [1], [2], [3]
|
|
|
|
| 122 |
if isinstance(result, str):
|
| 123 |
result = ast.literal_eval(result)
|
| 124 |
|
| 125 |
+
# Handle both ingested files (metadata at top level) and retrieved documents (metadata in answer_metadata)
|
| 126 |
+
# Check if metadata is nested in 'answer_metadata' (retrieved documents)
|
| 127 |
metadata = result.get('answer_metadata', {})
|
| 128 |
+
|
| 129 |
+
# If answer_metadata is empty or missing, check top level (ingested files)
|
| 130 |
+
if not metadata or all(v is None or v == 'Unknown' for v in metadata.values()):
|
| 131 |
+
# For ingested files, metadata is at the top level
|
| 132 |
+
doc_info = {
|
| 133 |
+
'answer': result.get('answer', result.get('content', '')),
|
| 134 |
+
'filename': result.get('filename', 'Unknown'),
|
| 135 |
+
'page': result.get('page', 'Unknown'),
|
| 136 |
+
'year': result.get('year', 'Unknown'),
|
| 137 |
+
'source': result.get('source', 'Unknown'),
|
| 138 |
+
'document_id': result.get('_id', result.get('document_id', 'Unknown'))
|
| 139 |
+
}
|
| 140 |
+
else:
|
| 141 |
+
# For retrieved documents, use nested metadata
|
| 142 |
+
doc_info = {
|
| 143 |
+
'answer': result.get('answer', ''),
|
| 144 |
+
'filename': metadata.get('filename', 'Unknown'),
|
| 145 |
+
'page': metadata.get('page', 'Unknown'),
|
| 146 |
+
'year': metadata.get('year', 'Unknown'),
|
| 147 |
+
'source': metadata.get('source', 'Unknown'),
|
| 148 |
+
'document_id': metadata.get('_id', 'Unknown')
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
processed_results.append(doc_info)
|
| 152 |
|
| 153 |
# Format context string - SIMPLIFIED TO ONLY USE [1], [2], [3]
|