Spaces:
Running
Running
Commit
·
b50fffd
1
Parent(s):
2f0030b
refactor: address final CodeRabbit feedback
Browse files- Fix Windows path escaping in verify_sandbox.py using .as_posix()
- Improve verdict detection with robust regex matching (avoid false positives)
- Wire allow_network parameter to Modal sandbox block_network
- Add finally block for sandbox resource cleanup
- Use lru_cache for thread-safe singleton pattern
- Add OpenAI-only note to LlamaIndexRAGService docstring
examples/modal_demo/verify_sandbox.py
CHANGED
|
@@ -56,10 +56,10 @@ def test_2_file_system_isolation():
|
|
| 56 |
print(f"Local file exists: {local_file}")
|
| 57 |
print(f"Can read locally: {local_file.exists()}")
|
| 58 |
|
| 59 |
-
# Try to access it from sandbox
|
| 60 |
code = f"""
|
| 61 |
from pathlib import Path
|
| 62 |
-
file_path = Path("{local_file}")
|
| 63 |
exists = file_path.exists()
|
| 64 |
print(f"File exists in sandbox: {{exists}}")
|
| 65 |
if exists:
|
|
|
|
| 56 |
print(f"Local file exists: {local_file}")
|
| 57 |
print(f"Can read locally: {local_file.exists()}")
|
| 58 |
|
| 59 |
+
# Try to access it from sandbox (use POSIX path for Windows compatibility)
|
| 60 |
code = f"""
|
| 61 |
from pathlib import Path
|
| 62 |
+
file_path = Path("{local_file.as_posix()}")
|
| 63 |
exists = file_path.exists()
|
| 64 |
print(f"File exists in sandbox: {{exists}}")
|
| 65 |
if exists:
|
src/agents/analysis_agent.py
CHANGED
|
@@ -235,16 +235,21 @@ Generate executable Python code only (no markdown, no explanations).
|
|
| 235 |
execution_result: dict[str, Any],
|
| 236 |
) -> AnalysisResult:
|
| 237 |
"""Interpret code execution results using LLM."""
|
| 238 |
-
|
|
|
|
|
|
|
| 239 |
stdout = execution_result["stdout"]
|
|
|
|
| 240 |
verdict = "INCONCLUSIVE" # Default
|
| 241 |
|
| 242 |
-
#
|
| 243 |
-
if "
|
|
|
|
|
|
|
| 244 |
verdict = "SUPPORTED"
|
| 245 |
-
elif "
|
| 246 |
verdict = "REFUTED"
|
| 247 |
-
elif "
|
| 248 |
verdict = "INCONCLUSIVE"
|
| 249 |
|
| 250 |
# Parse key findings from output
|
|
|
|
| 235 |
execution_result: dict[str, Any],
|
| 236 |
) -> AnalysisResult:
|
| 237 |
"""Interpret code execution results using LLM."""
|
| 238 |
+
import re
|
| 239 |
+
|
| 240 |
+
# Extract verdict from output using robust word-boundary matching
|
| 241 |
stdout = execution_result["stdout"]
|
| 242 |
+
stdout_upper = stdout.upper()
|
| 243 |
verdict = "INCONCLUSIVE" # Default
|
| 244 |
|
| 245 |
+
# Avoid false positives like "NOT SUPPORTED" or "UNSUPPORTED"
|
| 246 |
+
if re.search(r"\bSUPPORTED\b", stdout_upper) and not re.search(
|
| 247 |
+
r"\b(?:NOT|UN)SUPPORTED\b", stdout_upper
|
| 248 |
+
):
|
| 249 |
verdict = "SUPPORTED"
|
| 250 |
+
elif re.search(r"\bREFUTED\b", stdout_upper):
|
| 251 |
verdict = "REFUTED"
|
| 252 |
+
elif re.search(r"\bINCONCLUSIVE\b", stdout_upper):
|
| 253 |
verdict = "INCONCLUSIVE"
|
| 254 |
|
| 255 |
# Parse key findings from output
|
src/services/llamaindex_rag.py
CHANGED
|
@@ -15,7 +15,13 @@ logger = structlog.get_logger()
|
|
| 15 |
|
| 16 |
|
| 17 |
class LlamaIndexRAGService:
|
| 18 |
-
"""RAG service using LlamaIndex with ChromaDB vector store.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def __init__(
|
| 21 |
self,
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
class LlamaIndexRAGService:
|
| 18 |
+
"""RAG service using LlamaIndex with ChromaDB vector store.
|
| 19 |
+
|
| 20 |
+
Note:
|
| 21 |
+
This service is currently OpenAI-only. It uses OpenAI embeddings and LLM
|
| 22 |
+
regardless of the global `settings.llm_provider` configuration.
|
| 23 |
+
Requires OPENAI_API_KEY to be set.
|
| 24 |
+
"""
|
| 25 |
|
| 26 |
def __init__(
|
| 27 |
self,
|
src/tools/code_execution.py
CHANGED
|
@@ -5,6 +5,7 @@ It's designed for running LLM-generated statistical analysis code safely.
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import os
|
|
|
|
| 8 |
from typing import Any
|
| 9 |
|
| 10 |
import structlog
|
|
@@ -120,12 +121,13 @@ class ModalCodeExecutor:
|
|
| 120 |
app=app,
|
| 121 |
image=scientific_image,
|
| 122 |
timeout=timeout,
|
| 123 |
-
#
|
| 124 |
)
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
| 129 |
import sys
|
| 130 |
import io
|
| 131 |
from contextlib import redirect_stdout, redirect_stderr
|
|
@@ -148,15 +150,15 @@ print(stderr_io.getvalue(), file=sys.stderr)
|
|
| 148 |
print("__STDERR_END__", file=sys.stderr)
|
| 149 |
"""
|
| 150 |
|
| 151 |
-
|
| 152 |
-
|
| 153 |
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
|
| 161 |
# Parse output
|
| 162 |
success = "__EXECUTION_SUCCESS__" in stdout_raw
|
|
@@ -248,13 +250,7 @@ print(json.dumps({{"__RESULT__": result}}))
|
|
| 248 |
return text.strip()
|
| 249 |
|
| 250 |
|
| 251 |
-
|
| 252 |
-
_executor: ModalCodeExecutor | None = None
|
| 253 |
-
|
| 254 |
-
|
| 255 |
def get_code_executor() -> ModalCodeExecutor:
|
| 256 |
-
"""Get or create singleton code executor instance."""
|
| 257 |
-
|
| 258 |
-
if _executor is None:
|
| 259 |
-
_executor = ModalCodeExecutor()
|
| 260 |
-
return _executor
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import os
|
| 8 |
+
from functools import lru_cache
|
| 9 |
from typing import Any
|
| 10 |
|
| 11 |
import structlog
|
|
|
|
| 121 |
app=app,
|
| 122 |
image=scientific_image,
|
| 123 |
timeout=timeout,
|
| 124 |
+
block_network=not allow_network, # Wire the network control
|
| 125 |
)
|
| 126 |
|
| 127 |
+
try:
|
| 128 |
+
# Execute the code
|
| 129 |
+
# Wrap code to capture result
|
| 130 |
+
wrapped_code = f"""
|
| 131 |
import sys
|
| 132 |
import io
|
| 133 |
from contextlib import redirect_stdout, redirect_stderr
|
|
|
|
| 150 |
print("__STDERR_END__", file=sys.stderr)
|
| 151 |
"""
|
| 152 |
|
| 153 |
+
# Run the wrapped code
|
| 154 |
+
process = sandbox.exec("python", "-c", wrapped_code, timeout=timeout)
|
| 155 |
|
| 156 |
+
# Read output
|
| 157 |
+
stdout_raw = process.stdout.read()
|
| 158 |
+
stderr_raw = process.stderr.read()
|
| 159 |
+
finally:
|
| 160 |
+
# Always clean up sandbox to prevent resource leaks
|
| 161 |
+
sandbox.terminate()
|
| 162 |
|
| 163 |
# Parse output
|
| 164 |
success = "__EXECUTION_SUCCESS__" in stdout_raw
|
|
|
|
| 250 |
return text.strip()
|
| 251 |
|
| 252 |
|
| 253 |
+
@lru_cache(maxsize=1)
|
|
|
|
|
|
|
|
|
|
| 254 |
def get_code_executor() -> ModalCodeExecutor:
|
| 255 |
+
"""Get or create singleton code executor instance (thread-safe via lru_cache)."""
|
| 256 |
+
return ModalCodeExecutor()
|
|
|
|
|
|
|
|
|