Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
direct output: remove cache
Browse files- app/nodes.py +30 -29
app/nodes.py
CHANGED
|
@@ -58,13 +58,14 @@ def detect_file_type_node(state: GraphState) -> GraphState:
|
|
| 58 |
# Check if we've already shown direct output for this exact file
|
| 59 |
file_hash = compute_file_hash(state["file_content"])
|
| 60 |
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
| 68 |
else:
|
| 69 |
# Direct output disabled - use standard workflow
|
| 70 |
logger.info(f"Direct output mode disabled - using standard RAG pipeline")
|
|
@@ -100,20 +101,20 @@ def ingest_node(state: GraphState) -> GraphState:
|
|
| 100 |
file_hash = state.get("file_hash")
|
| 101 |
|
| 102 |
# Check cache ONLY if direct output is enabled and file was previously processed
|
| 103 |
-
if DIRECT_OUTPUT_ENABLED and file_hash and file_hash in _direct_output_cache:
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
|
| 118 |
# Standard processing (both for new direct output files and all standard files)
|
| 119 |
logger.info(f"Ingesting {file_type} file: {state['filename']}")
|
|
@@ -139,14 +140,14 @@ def ingest_node(state: GraphState) -> GraphState:
|
|
| 139 |
os.unlink(tmp_file_path)
|
| 140 |
|
| 141 |
# Cache ONLY if direct output mode is enabled
|
| 142 |
-
if DIRECT_OUTPUT_ENABLED and file_hash:
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
|
| 151 |
duration = (datetime.now() - start_time).total_seconds()
|
| 152 |
metadata = state.get("metadata", {})
|
|
|
|
| 58 |
# Check if we've already shown direct output for this exact file
|
| 59 |
file_hash = compute_file_hash(state["file_content"])
|
| 60 |
|
| 61 |
+
# Comment out the cache check:
|
| 62 |
+
# if file_hash in _direct_output_cache:
|
| 63 |
+
# logger.info(f"Direct output file already processed (hash: {file_hash[:8]}...) - will route to standard RAG for follow-up")
|
| 64 |
+
# workflow_type = "standard" # Override to standard for follow-up queries
|
| 65 |
+
# is_cached_direct_output = True
|
| 66 |
+
# else:
|
| 67 |
+
logger.info(f"Direct output mode enabled - new file will show ingestor results directly")
|
| 68 |
+
workflow_type = "direct_output"
|
| 69 |
else:
|
| 70 |
# Direct output disabled - use standard workflow
|
| 71 |
logger.info(f"Direct output mode disabled - using standard RAG pipeline")
|
|
|
|
| 101 |
file_hash = state.get("file_hash")
|
| 102 |
|
| 103 |
# Check cache ONLY if direct output is enabled and file was previously processed
|
| 104 |
+
# if DIRECT_OUTPUT_ENABLED and file_hash and file_hash in _direct_output_cache:
|
| 105 |
+
# cached_data = _direct_output_cache[file_hash]
|
| 106 |
+
# logger.info(f"Using cached result for direct output file: {state['filename']}")
|
| 107 |
+
|
| 108 |
+
# metadata = state.get("metadata", {})
|
| 109 |
+
# metadata.update({
|
| 110 |
+
# "ingestion_duration": 0,
|
| 111 |
+
# "ingestor_context_length": len(cached_data["ingestor_context"]),
|
| 112 |
+
# "ingestion_success": True,
|
| 113 |
+
# "cached": True,
|
| 114 |
+
# "cache_timestamp": cached_data["timestamp"]
|
| 115 |
+
# })
|
| 116 |
+
|
| 117 |
+
# return {"ingestor_context": cached_data["ingestor_context"], "metadata": metadata}
|
| 118 |
|
| 119 |
# Standard processing (both for new direct output files and all standard files)
|
| 120 |
logger.info(f"Ingesting {file_type} file: {state['filename']}")
|
|
|
|
| 140 |
os.unlink(tmp_file_path)
|
| 141 |
|
| 142 |
# Cache ONLY if direct output mode is enabled
|
| 143 |
+
# if DIRECT_OUTPUT_ENABLED and file_hash:
|
| 144 |
+
# _direct_output_cache[file_hash] = {
|
| 145 |
+
# "ingestor_context": ingestor_context,
|
| 146 |
+
# "timestamp": datetime.now().isoformat(),
|
| 147 |
+
# "filename": state["filename"],
|
| 148 |
+
# "file_type": file_type
|
| 149 |
+
# }
|
| 150 |
+
# logger.info(f"Cached direct output result for file hash: {file_hash[:8]}...")
|
| 151 |
|
| 152 |
duration = (datetime.now() - start_time).total_seconds()
|
| 153 |
metadata = state.get("metadata", {})
|