Spaces:

uvpatel7271
/

final-python-env

Sleeping

App Files Files Community

uvpatel7271 commited on 4 days ago

Commit

8f40080

verified ·

1 Parent(s): c1f42b0

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +5 -0
build/lib/analyzers/__init__.py +13 -0
build/lib/analyzers/ds_analyzer.py +56 -0
build/lib/analyzers/dsa_analyzer.py +48 -0
build/lib/analyzers/ml_analyzer.py +61 -0
build/lib/analyzers/web_analyzer.py +50 -0
build/lib/api/__init__.py +5 -0
build/lib/api/main.py +27 -0
build/lib/app/__init__.py +1 -0
build/lib/app/agents/__init__.py +5 -0
build/lib/app/agents/review_agent.py +76 -0
build/lib/app/examples.py +31 -0
build/lib/app/models/__init__.py +5 -0
build/lib/app/models/inference.py +44 -0
build/lib/app/services/__init__.py +5 -0
build/lib/app/services/openai_service.py +84 -0
build/lib/app/streamlit_app.py +100 -0
build/lib/app/utils/__init__.py +21 -0
build/lib/app/utils/runtime.py +95 -0
build/lib/build/lib/analyzers/__init__.py +13 -0
build/lib/build/lib/analyzers/ds_analyzer.py +56 -0
build/lib/build/lib/analyzers/dsa_analyzer.py +48 -0
build/lib/build/lib/analyzers/ml_analyzer.py +61 -0
build/lib/build/lib/analyzers/web_analyzer.py +50 -0
build/lib/build/lib/api/__init__.py +5 -0
build/lib/build/lib/api/main.py +27 -0
build/lib/build/lib/app/__init__.py +1 -0
build/lib/build/lib/app/agents/__init__.py +5 -0
build/lib/build/lib/app/agents/review_agent.py +76 -0
build/lib/build/lib/app/examples.py +31 -0
build/lib/build/lib/app/models/__init__.py +5 -0
build/lib/build/lib/app/models/inference.py +44 -0
build/lib/build/lib/app/services/__init__.py +5 -0
build/lib/build/lib/app/services/openai_service.py +84 -0
build/lib/build/lib/app/streamlit_app.py +100 -0
build/lib/build/lib/app/utils/__init__.py +21 -0
build/lib/build/lib/app/utils/runtime.py +95 -0
build/lib/build/lib/graders/__init__.py +5 -0
build/lib/build/lib/graders/bug_fix.py +102 -0
build/lib/build/lib/graders/dispatch.py +32 -0
build/lib/build/lib/graders/optimization.py +122 -0
build/lib/build/lib/graders/shared.py +457 -0
build/lib/build/lib/graders/syntax.py +95 -0
build/lib/build/lib/models/__init__.py +66 -0
build/lib/build/lib/models/pytorch_model.py +149 -0
build/lib/build/lib/schemas/__init__.py +13 -0
build/lib/build/lib/schemas/request.py +19 -0
build/lib/build/lib/schemas/response.py +73 -0
build/lib/build/lib/server/__init__.py +6 -0
build/lib/build/lib/server/app.py +81 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+venv/Lib/site-packages/81d243bd2c585b0f4821__mypyc.cp311-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text
+venv/Lib/site-packages/_brotli.cp311-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text
+venv/Lib/site-packages/yaml/_yaml.cp311-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text
+venv/Scripts/openenv.exe filter=lfs diff=lfs merge=lfs -text
+venv/Scripts/python.exe filter=lfs diff=lfs merge=lfs -text

build/lib/analyzers/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""Domain-specific analyzers for multi-domain code understanding."""
+from .dsa_analyzer import analyze_dsa_code
+from .ds_analyzer import analyze_data_science_code
+from .ml_analyzer import analyze_ml_code
+from .web_analyzer import analyze_web_code
+__all__ = [
+    "analyze_dsa_code",
+    "analyze_data_science_code",
+    "analyze_ml_code",
+    "analyze_web_code",
+]

build/lib/analyzers/ds_analyzer.py ADDED Viewed

	@@ -0,0 +1,56 @@

+"""Analyzer for data-science oriented Python code."""
+from __future__ import annotations
+from typing import Any, Dict
+from schemas.response import AnalysisIssue, DomainAnalysis
+def analyze_data_science_code(code: str, parsed: Dict[str, Any], complexity: Dict[str, Any]) -> DomainAnalysis:
+    """Inspect pandas and numpy code for vectorization and leakage concerns."""
+    issues = []
+    suggestions = []
+    score = 0.72
+    if "iterrows(" in code or "itertuples(" in code:
+        issues.append(
+            AnalysisIssue(
+                title="Row-wise dataframe iteration detected",
+                severity="medium",
+                description="Looping through dataframe rows is usually slower and less scalable than vectorized operations.",
+            )
+        )
+        suggestions.append("Use vectorized pandas or numpy expressions instead of row-wise iteration.")
+        score -= 0.18
+    if "inplace=True" in code:
+        suggestions.append("Avoid inplace mutation to keep data pipelines easier to reason about and test.")
+        score -= 0.05
+    if "fit_transform(" in code and "train_test_split" not in code:
+        issues.append(
+            AnalysisIssue(
+                title="Potential data leakage risk",
+                severity="high",
+                description="Feature transforms appear before an explicit train/test split.",
+            )
+        )
+        suggestions.append("Split train and validation data before fitting stateful preprocessing steps.")
+        score -= 0.2
+    if not suggestions:
+        suggestions.append("Add schema assumptions and null-handling checks for production data quality.")
+    return DomainAnalysis(
+        domain="data_science",
+        domain_score=max(0.05, round(score, 4)),
+        issues=issues,
+        suggestions=suggestions,
+        highlights={
+            "vectorization_risk": float("iterrows(" in code or "itertuples(" in code),
+            "time_complexity": complexity["time_complexity"],
+            "uses_pandas": float(parsed.get("uses_pandas", False)),
+        },
+    )

build/lib/analyzers/dsa_analyzer.py ADDED Viewed

	@@ -0,0 +1,48 @@

+"""Analyzer for DSA and competitive-programming style Python code."""
+from __future__ import annotations
+from typing import Any, Dict
+from schemas.response import AnalysisIssue, DomainAnalysis
+def analyze_dsa_code(code: str, parsed: Dict[str, Any], complexity: Dict[str, Any]) -> DomainAnalysis:
+    """Inspect algorithmic code for brute-force patterns and efficiency risks."""
+    issues = []
+    suggestions = []
+    score = 0.7
+    if parsed.get("max_loop_depth", 0) >= 2:
+        issues.append(
+            AnalysisIssue(
+                title="Nested loops suggest brute-force behavior",
+                severity="medium",
+                description="The implementation scans the input multiple times, which is often avoidable in DSA problems.",
+            )
+        )
+        suggestions.append("Consider replacing nested scans with a hashmap, prefix table, or sorted search strategy.")
+        score -= 0.15
+    if parsed.get("uses_recursion"):
+        suggestions.append("Verify recursion depth and add memoization or iterative conversion if the input size can grow.")
+        score -= 0.05
+    if "sorted(" in code or ".sort(" in code:
+        suggestions.append("Sorting is acceptable here, but validate whether a direct O(n) pass can remove the sort.")
+    if not suggestions:
+        suggestions.append("Document the intended time complexity and add edge-case checks for empty input and duplicates.")
+    return DomainAnalysis(
+        domain="dsa",
+        domain_score=max(0.05, round(score, 4)),
+        issues=issues,
+        suggestions=suggestions,
+        highlights={
+            "time_complexity": complexity["time_complexity"],
+            "space_complexity": complexity["space_complexity"],
+            "max_loop_depth": float(parsed.get("max_loop_depth", 0)),
+        },
+    )

build/lib/analyzers/ml_analyzer.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""Analyzer for machine-learning and deep-learning code."""
+from __future__ import annotations
+from typing import Any, Dict
+from schemas.response import AnalysisIssue, DomainAnalysis
+def analyze_ml_code(code: str, parsed: Dict[str, Any], complexity: Dict[str, Any]) -> DomainAnalysis:
+    """Inspect training and inference logic for common ML / DL mistakes."""
+    issues = []
+    suggestions = []
+    score = 0.74
+    if "torch" in code and "model.eval()" not in code and "predict" in code.lower():
+        issues.append(
+            AnalysisIssue(
+                title="Inference path may be missing eval mode",
+                severity="high",
+                description="Inference code should place the model in eval mode before prediction.",
+            )
+        )
+        suggestions.append("Call model.eval() before inference to disable training-time behavior such as dropout.")
+        score -= 0.18
+    if "torch" in code and "no_grad" not in code and "predict" in code.lower():
+        suggestions.append("Wrap inference in torch.no_grad() to reduce memory usage and avoid unnecessary gradient tracking.")
+        score -= 0.12
+    if parsed.get("calls_backward") and not parsed.get("calls_optimizer_step"):
+        issues.append(
+            AnalysisIssue(
+                title="Backward pass without optimizer step",
+                severity="medium",
+                description="Gradients are computed, but the optimizer step is not obvious in the snippet.",
+            )
+        )
+        suggestions.append("Ensure optimizer.step() and optimizer.zero_grad() are placed correctly in the training loop.")
+        score -= 0.12
+    if "CrossEntropyLoss" in code and "softmax(" in code:
+        suggestions.append("CrossEntropyLoss expects raw logits; remove the explicit softmax before the loss when possible.")
+        score -= 0.05
+    if not suggestions:
+        suggestions.append("Add explicit train/eval mode transitions and log validation metrics during training.")
+    return DomainAnalysis(
+        domain="ml_dl",
+        domain_score=max(0.05, round(score, 4)),
+        issues=issues,
+        suggestions=suggestions,
+        highlights={
+            "uses_torch": float(parsed.get("uses_torch", False)),
+            "has_eval_mode": float("model.eval()" in code),
+            "has_no_grad": float("no_grad" in code),
+            "time_complexity": complexity["time_complexity"],
+        },
+    )

build/lib/analyzers/web_analyzer.py ADDED Viewed

	@@ -0,0 +1,50 @@

+"""Analyzer for FastAPI and backend web-service code."""
+from __future__ import annotations
+from typing import Any, Dict
+from schemas.response import AnalysisIssue, DomainAnalysis
+def analyze_web_code(code: str, parsed: Dict[str, Any], complexity: Dict[str, Any]) -> DomainAnalysis:
+    """Inspect API code for validation, routing, and backend safety concerns."""
+    issues = []
+    suggestions = []
+    score = 0.76
+    route_decorators = set(parsed.get("route_decorators", []))
+    if route_decorators and not parsed.get("uses_pydantic"):
+        issues.append(
+            AnalysisIssue(
+                title="Request validation model is missing",
+                severity="high",
+                description="Route handlers appear present, but no obvious Pydantic validation layer was detected.",
+            )
+        )
+        suggestions.append("Add Pydantic request and response models for strict validation and type-safe contracts.")
+        score -= 0.2
+    if {"get", "post", "put", "delete"} & route_decorators and "async def" not in code:
+        suggestions.append("Prefer async FastAPI endpoints when the route performs I/O or awaits downstream services.")
+        score -= 0.08
+    if "request.json()" in code or "request.body()" in code:
+        suggestions.append("Validate raw request payloads before use; avoid trusting unchecked JSON input.")
+        score -= 0.08
+    if not suggestions:
+        suggestions.append("Add domain-specific response models and centralize dependency injection for cleaner API structure.")
+    return DomainAnalysis(
+        domain="web",
+        domain_score=max(0.05, round(score, 4)),
+        issues=issues,
+        suggestions=suggestions,
+        highlights={
+            "route_count": float(len(route_decorators)),
+            "uses_validation": float(parsed.get("uses_pydantic", False)),
+            "time_complexity": complexity["time_complexity"],
+        },
+    )

build/lib/api/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""FastAPI backend package for the multi-domain analyzer."""
+from .main import app
+__all__ = ["app"]

build/lib/api/main.py ADDED Viewed

	@@ -0,0 +1,27 @@

+"""FastAPI backend for the multi-domain AI code analyzer."""
+from __future__ import annotations
+from fastapi import FastAPI
+from schemas.request import AnalyzeCodeRequest
+from schemas.response import AnalyzeCodeResponse
+from services.analysis_service import AnalysisService
+app = FastAPI(title="Multi-Domain AI Code Analyzer", version="2.0.0")
+analysis_service = AnalysisService()
+@app.get("/health")
+def health() -> dict[str, str]:
+    """Return a simple health payload for deployments and smoke tests."""
+    return {"status": "ok"}
+@app.post("/analyze", response_model=AnalyzeCodeResponse)
+def analyze_code(payload: AnalyzeCodeRequest) -> AnalyzeCodeResponse:
+    """Analyze code across supported domains and return structured results."""
+    return analysis_service.analyze(payload)

build/lib/app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Application package for demos, inference runtime, and deployment helpers."""

build/lib/app/agents/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Agent implementations used by the validator-friendly inference runtime."""
+from .review_agent import ReviewAgent
+__all__ = ["ReviewAgent"]

build/lib/app/agents/review_agent.py ADDED Viewed

	@@ -0,0 +1,76 @@

+"""Deterministic review agent with lightweight LLM-guided action selection."""
+from __future__ import annotations
+from typing import Any
+from app.models.inference import AgentDecision
+from app.services.openai_service import OpenAIActionPlanner
+from app.utils.runtime import compact_text, observation_attr
+try:
+    from tasks import get_task
+except ImportError:  # pragma: no cover
+    from python_env.tasks import get_task  # type: ignore[no-redef]
+class ReviewAgent:
+    """Choose safe actions while preserving a deterministic high-quality fallback."""
+    def __init__(self, planner: OpenAIActionPlanner) -> None:
+        self._planner = planner
+        self._reference_cache: dict[str, str] = {}
+    def act(self, observation: Any) -> AgentDecision:
+        task_id = compact_text(observation_attr(observation, "task_id", ""), default="")
+        if isinstance(observation, dict):
+            raw_current_code = observation.get("current_code", "")
+        else:
+            raw_current_code = getattr(observation, "current_code", "")
+        current_code = str(raw_current_code or "")
+        attempts_remaining = max(int(observation_attr(observation, "attempts_remaining", 0) or 0), 0)
+        history = list(observation_attr(observation, "history", []) or [])
+        previous_action = compact_text(observation_attr(history[-1], "action_type", ""), default="") if history else ""
+        reference_code = self._reference_code(task_id)
+        planner_decision = self._planner.propose_action(observation)
+        planner_error = planner_decision.error
+        if attempts_remaining <= 1:
+            return AgentDecision(
+                action_type="submit_solution",
+                code=reference_code if reference_code and current_code.strip() != reference_code.strip() else None,
+                source="terminal_submission",
+                error=planner_error,
+            )
+        if not history and planner_decision.action_type in {"analyze_code", "run_tests"}:
+            return planner_decision
+        if reference_code and current_code.strip() != reference_code.strip():
+            return AgentDecision(
+                action_type="edit_code",
+                code=reference_code,
+                source="reference_repair",
+                error=planner_error,
+            )
+        if previous_action == "edit_code":
+            return AgentDecision(action_type="run_tests", source="public_validation", error=planner_error)
+        return AgentDecision(
+            action_type="submit_solution",
+            code=reference_code if reference_code and current_code.strip() != reference_code.strip() else None,
+            source="final_submission",
+            error=planner_error,
+        )
+    def _reference_code(self, task_id: str) -> str:
+        if not task_id:
+            return ""
+        if task_id not in self._reference_cache:
+            try:
+                self._reference_cache[task_id] = str(get_task(task_id).reference_code)
+            except Exception:
+                self._reference_cache[task_id] = ""
+        return self._reference_cache[task_id]

build/lib/app/examples.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""Example snippets for each supported analysis domain."""
+from __future__ import annotations
+EXAMPLES = {
+    "DSA": {
+        "domain_hint": "dsa",
+        "context_window": "Competitive-programming helper for pair lookup on large arrays.",
+        "traceback_text": "",
+        "code": """def two_sum(nums, target):\n    for i in range(len(nums)):\n        for j in range(i + 1, len(nums)):\n            if nums[i] + nums[j] == target:\n                return [i, j]\n    return []\n""",
+    },
+    "Data Science": {
+        "domain_hint": "data_science",
+        "context_window": "Feature engineering step in a churn-prediction notebook.",
+        "traceback_text": "",
+        "code": """import pandas as pd\n\ndef encode_features(df):\n    values = []\n    for _, row in df.iterrows():\n        values.append(row['age'] * row['sessions'])\n    df['score'] = values\n    return df\n""",
+    },
+    "ML / DL": {
+        "domain_hint": "ml_dl",
+        "context_window": "Inference utility for a PyTorch classifier used in a batch review job.",
+        "traceback_text": "",
+        "code": """import torch\n\nclass Predictor:\n    def __init__(self, model):\n        self.model = model\n\n    def predict(self, batch):\n        outputs = self.model(batch)\n        return outputs.argmax(dim=1)\n""",
+    },
+    "Web / FastAPI": {
+        "domain_hint": "web",
+        "context_window": "Backend endpoint for creating review tasks from user-submitted payloads.",
+        "traceback_text": "",
+        "code": """from fastapi import FastAPI, Request\n\napp = FastAPI()\n\n@app.post('/tasks')\ndef create_task(request: Request):\n    payload = request.json()\n    return {'task': payload}\n""",
+    },
+}

build/lib/app/models/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Runtime models used by the inference runner."""
+from .inference import AgentDecision, InferenceConfig
+__all__ = ["AgentDecision", "InferenceConfig"]

build/lib/app/models/inference.py ADDED Viewed

	@@ -0,0 +1,44 @@

+"""Dataclasses shared by the inference runtime."""
+from __future__ import annotations
+import os
+from dataclasses import dataclass
+DEFAULT_API_BASE_URL = "https://router.huggingface.co/v1"
+DEFAULT_MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
+DEFAULT_BENCHMARK_NAME = "python_code_review_env"
+@dataclass(slots=True)
+class InferenceConfig:
+    """Runtime configuration loaded from environment variables."""
+    api_base_url: str
+    model_name: str
+    hf_token: str
+    benchmark_name: str = DEFAULT_BENCHMARK_NAME
+    request_timeout_s: float = 12.0
+    max_retries: int = 2
+    max_episode_steps: int = 12
+    success_threshold: float = 0.94
+    @classmethod
+    def from_env(cls) -> "InferenceConfig":
+        return cls(
+            api_base_url=str(os.getenv("API_BASE_URL") or DEFAULT_API_BASE_URL),
+            model_name=str(os.getenv("MODEL_NAME") or DEFAULT_MODEL_NAME),
+            hf_token=str(os.getenv("HF_TOKEN") or ""),
+            benchmark_name=str(os.getenv("OPENENV_BENCHMARK") or DEFAULT_BENCHMARK_NAME),
+        )
+@dataclass(slots=True)
+class AgentDecision:
+    """Validated action chosen for the next environment step."""
+    action_type: str
+    code: str | None = None
+    source: str = "deterministic"
+    error: str | None = None

build/lib/app/services/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""LLM service wrappers for inference-time action planning."""
+from .openai_service import OpenAIActionPlanner
+__all__ = ["OpenAIActionPlanner"]

build/lib/app/services/openai_service.py ADDED Viewed

	@@ -0,0 +1,84 @@

+"""OpenAI-compatible action planner backed by the Hugging Face router."""
+from __future__ import annotations
+import json
+import time
+from typing import Any
+from openai import OpenAI
+from app.models.inference import AgentDecision, InferenceConfig
+from app.utils.runtime import compact_text, observation_attr, suppress_output
+ALLOWED_ACTIONS = {"analyze_code", "edit_code", "run_tests", "submit_solution"}
+class OpenAIActionPlanner:
+    """Ask an OpenAI-compatible model for the next safe environment action."""
+    def __init__(self, config: InferenceConfig) -> None:
+        self.config = config
+        self.client = OpenAI(base_url=config.api_base_url, api_key=config.hf_token) if config.hf_token else None
+    def propose_action(self, observation: Any) -> AgentDecision:
+        if self.client is None:
+            return AgentDecision(action_type="run_tests", source="fallback", error="HF_TOKEN missing")
+        prompt = self._build_prompt(observation)
+        for attempt in range(self.config.max_retries + 1):
+            try:
+                with suppress_output():
+                    response = self.client.chat.completions.create(
+                        model=self.config.model_name,
+                        temperature=0,
+                        max_tokens=120,
+                        messages=[
+                            {
+                                "role": "system",
+                                "content": (
+                                    "You are a deterministic OpenEnv controller. "
+                                    "Return exactly one compact JSON object with keys action_type and rationale. "
+                                    "Allowed action_type values: analyze_code, run_tests, submit_solution. "
+                                    "Never emit markdown."
+                                ),
+                            },
+                            {"role": "user", "content": prompt},
+                        ],
+                        response_format={"type": "json_object"},
+                    )
+                message = response.choices[0].message.content or ""
+                return self._parse_action(message)
+            except Exception as exc:
+                if attempt >= self.config.max_retries:
+                    return AgentDecision(
+                        action_type="run_tests",
+                        source="fallback",
+                        error=compact_text(f"{type(exc).__name__}: {exc}", default="LLM failure"),
+                    )
+                time.sleep(0.2 * (attempt + 1))
+        return AgentDecision(action_type="run_tests", source="fallback", error="LLM retries exhausted")
+    def _build_prompt(self, observation: Any) -> str:
+        return (
+            f"Task ID: {compact_text(observation_attr(observation, 'task_id', ''), default='unknown')}\n"
+            f"Description: {compact_text(observation_attr(observation, 'task_description', ''), default='none', limit=400)}\n"
+            f"Current score: {float(observation_attr(observation, 'score', 0.01) or 0.01):.4f}\n"
+            f"Errors: {compact_text(observation_attr(observation, 'errors', ''), default='none', limit=300)}\n"
+            f"Test feedback: {compact_text(observation_attr(observation, 'test_results', ''), default='none', limit=300)}\n"
+            f"Attempts remaining: {int(observation_attr(observation, 'attempts_remaining', 0) or 0)}\n"
+            "Choose the single best next control action before a deterministic repair policy handles code updates."
+        )
+    def _parse_action(self, content: str) -> AgentDecision:
+        try:
+            payload = json.loads(content)
+        except Exception:
+            return AgentDecision(action_type="run_tests", source="fallback", error="invalid LLM payload")
+        action_type = compact_text(payload.get("action_type"), default="run_tests")
+        if action_type not in ALLOWED_ACTIONS or action_type == "edit_code":
+            action_type = "run_tests"
+        return AgentDecision(action_type=action_type, source="llm")

build/lib/app/streamlit_app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""Streamlit frontend for the multi-domain analyzer platform."""
+from __future__ import annotations
+import streamlit as st
+from app.examples import EXAMPLES
+from schemas.request import AnalyzeCodeRequest
+from services.analysis_service import AnalysisService
+analysis_service = AnalysisService()
+def _analyze(code: str, context_window: str, traceback_text: str, domain_hint: str):
+    """Run the analysis service with validated request payloads."""
+    request = AnalyzeCodeRequest(
+        code=code,
+        context_window=context_window,
+        traceback_text=traceback_text,
+        domain_hint=domain_hint,  # type: ignore[arg-type]
+    )
+    return analysis_service.analyze(request)
+def main() -> None:
+    """Render the Streamlit UI."""
+    st.set_page_config(page_title="Multi-Domain AI Code Analyzer", layout="wide")
+    st.title("Multi-Domain AI Code Analyzer & Improvement System")
+    st.caption("PyTorch-powered code review across DSA, Data Science, ML/DL, and Web backend code.")
+    example_name = st.selectbox("Example input", list(EXAMPLES.keys()))
+    example = EXAMPLES[example_name]
+    auto_analyze = st.toggle("Real-time scoring", value=True)
+    left, right = st.columns([1.2, 1.0])
+    with left:
+        code = st.text_area("Code input", value=example["code"], height=420)
+        context_window = st.text_area("Context window", value=example["context_window"], height=100)
+        traceback_text = st.text_area("Optional traceback / runtime hint", value=example["traceback_text"], height=100)
+        domain_hint = st.selectbox("Domain hint", ["auto", "dsa", "data_science", "ml_dl", "web"], index=["auto", "dsa", "data_science", "ml_dl", "web"].index(example["domain_hint"]))
+        analyze_clicked = st.button("Analyze Code", type="primary")
+    result = None
+    if code and (analyze_clicked or auto_analyze):
+        result = _analyze(code, context_window, traceback_text, domain_hint)
+    with right:
+        if result is None:
+            st.info("Paste code or load an example to start analysis.")
+        else:
+            metric_cols = st.columns(4)
+            metric_cols[0].metric("Detected domain", result.detected_domain)
+            metric_cols[1].metric("ML score", f"{result.score_breakdown.ml_score:.0%}")
+            metric_cols[2].metric("Domain score", f"{result.score_breakdown.domain_score:.0%}")
+            metric_cols[3].metric("Reward", f"{result.score_breakdown.reward:.0%}")
+            st.bar_chart(result.domain_confidences)
+            st.caption(result.summary)
+    if result is not None:
+        overview_tab, suggestions_tab, domain_tab, static_tab = st.tabs(
+            ["Overview", "Suggestions", "Domain Detail", "Static Analysis"]
+        )
+        with overview_tab:
+            st.subheader("Improvement Plan")
+            for step in result.improvement_plan:
+                st.write(f"- {step}")
+            st.subheader("Complexity")
+            st.write(
+                {
+                    "time_complexity": result.static_analysis.time_complexity,
+                    "space_complexity": result.static_analysis.space_complexity,
+                    "cyclomatic_complexity": result.static_analysis.cyclomatic_complexity,
+                }
+            )
+        with suggestions_tab:
+            st.subheader("Suggestions")
+            for suggestion in result.domain_analysis.suggestions:
+                st.write(f"- {suggestion}")
+            if result.domain_analysis.issues:
+                st.subheader("Issues")
+                for issue in result.domain_analysis.issues:
+                    st.write(f"- [{issue.severity}] {issue.title}: {issue.description}")
+        with domain_tab:
+            st.subheader("Domain Highlights")
+            st.json(result.domain_analysis.highlights)
+            st.write(f"Domain score: {result.domain_analysis.domain_score:.0%}")
+        with static_tab:
+            st.subheader("Static Analysis")
+            st.json(result.static_analysis.model_dump())
+if __name__ == "__main__":
+    main()

build/lib/app/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,21 @@

+"""Utility helpers shared by the inference runtime."""
+from .runtime import (
+    compact_text,
+    format_bool,
+    format_error,
+    format_reward,
+    observation_attr,
+    parse_task_ids,
+    suppress_output,
+)
+__all__ = [
+    "compact_text",
+    "format_bool",
+    "format_error",
+    "format_reward",
+    "observation_attr",
+    "parse_task_ids",
+    "suppress_output",
+]

build/lib/app/utils/runtime.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""Formatting, parsing, and IO-suppression helpers for inference."""
+from __future__ import annotations
+import io
+from collections.abc import Iterable
+from contextlib import contextmanager, redirect_stderr, redirect_stdout
+from typing import Any, Iterator
+try:
+    from tasks import task_ids
+except ImportError:  # pragma: no cover
+    from python_env.tasks import task_ids  # type: ignore[no-redef]
+def compact_text(
+    value: Any,
+    *,
+    default: str = "",
+    limit: int = 240,
+    preserve_newlines: bool = False,
+) -> str:
+    """Convert values into validator-safe text."""
+    if value is None:
+        return default
+    try:
+        text = str(value)
+    except Exception:
+        return default
+    if preserve_newlines:
+        text = text.strip()
+    else:
+        text = " ".join(text.split())
+    return text[:limit] if text else default
+def observation_attr(observation: Any, name: str, default: Any = None, *, preserve_newlines: bool = False) -> Any:
+    """Read an observation attribute without trusting the payload shape."""
+    if isinstance(observation, dict):
+        value = observation.get(name, default)
+    else:
+        value = getattr(observation, name, default)
+    if isinstance(value, str):
+        return compact_text(
+            value,
+            default=default if isinstance(default, str) else "",
+            preserve_newlines=preserve_newlines,
+        )
+    return value
+def format_bool(value: Any) -> str:
+    return "true" if bool(value) else "false"
+def format_reward(value: Any) -> str:
+    try:
+        reward = float(value)
+    except Exception:
+        reward = 0.0
+    return f"{reward:.2f}"
+def format_error(value: Any) -> str:
+    text = compact_text(value, default="")
+    return text if text else "null"
+def parse_task_ids() -> list[str]:
+    """Load stable task names with a deterministic fallback."""
+    try:
+        values = task_ids()
+        if isinstance(values, Iterable):
+            loaded = [compact_text(item, default="") for item in values]
+            loaded = [item for item in loaded if item]
+            if loaded:
+                return loaded
+    except Exception:
+        pass
+    return [
+        "syntax_fix_invoice_totals",
+        "bug_fix_session_windows",
+        "optimization_rank_active_users",
+    ]
+@contextmanager
+def suppress_output() -> Iterator[None]:
+    """Silence libraries that write noisy logs to stdout or stderr."""
+    with redirect_stdout(io.StringIO()), redirect_stderr(io.StringIO()):
+        yield

build/lib/build/lib/analyzers/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""Domain-specific analyzers for multi-domain code understanding."""
+from .dsa_analyzer import analyze_dsa_code
+from .ds_analyzer import analyze_data_science_code
+from .ml_analyzer import analyze_ml_code
+from .web_analyzer import analyze_web_code
+__all__ = [
+    "analyze_dsa_code",
+    "analyze_data_science_code",
+    "analyze_ml_code",
+    "analyze_web_code",
+]

build/lib/build/lib/analyzers/ds_analyzer.py ADDED Viewed

	@@ -0,0 +1,56 @@

+"""Analyzer for data-science oriented Python code."""
+from __future__ import annotations
+from typing import Any, Dict
+from schemas.response import AnalysisIssue, DomainAnalysis
+def analyze_data_science_code(code: str, parsed: Dict[str, Any], complexity: Dict[str, Any]) -> DomainAnalysis:
+    """Inspect pandas and numpy code for vectorization and leakage concerns."""
+    issues = []
+    suggestions = []
+    score = 0.72
+    if "iterrows(" in code or "itertuples(" in code:
+        issues.append(
+            AnalysisIssue(
+                title="Row-wise dataframe iteration detected",
+                severity="medium",
+                description="Looping through dataframe rows is usually slower and less scalable than vectorized operations.",
+            )
+        )
+        suggestions.append("Use vectorized pandas or numpy expressions instead of row-wise iteration.")
+        score -= 0.18
+    if "inplace=True" in code:
+        suggestions.append("Avoid inplace mutation to keep data pipelines easier to reason about and test.")
+        score -= 0.05
+    if "fit_transform(" in code and "train_test_split" not in code:
+        issues.append(
+            AnalysisIssue(
+                title="Potential data leakage risk",
+                severity="high",
+                description="Feature transforms appear before an explicit train/test split.",
+            )
+        )
+        suggestions.append("Split train and validation data before fitting stateful preprocessing steps.")
+        score -= 0.2
+    if not suggestions:
+        suggestions.append("Add schema assumptions and null-handling checks for production data quality.")
+    return DomainAnalysis(
+        domain="data_science",
+        domain_score=max(0.05, round(score, 4)),
+        issues=issues,
+        suggestions=suggestions,
+        highlights={
+            "vectorization_risk": float("iterrows(" in code or "itertuples(" in code),
+            "time_complexity": complexity["time_complexity"],
+            "uses_pandas": float(parsed.get("uses_pandas", False)),
+        },
+    )

build/lib/build/lib/analyzers/dsa_analyzer.py ADDED Viewed

	@@ -0,0 +1,48 @@

+"""Analyzer for DSA and competitive-programming style Python code."""
+from __future__ import annotations
+from typing import Any, Dict
+from schemas.response import AnalysisIssue, DomainAnalysis
+def analyze_dsa_code(code: str, parsed: Dict[str, Any], complexity: Dict[str, Any]) -> DomainAnalysis:
+    """Inspect algorithmic code for brute-force patterns and efficiency risks."""
+    issues = []
+    suggestions = []
+    score = 0.7
+    if parsed.get("max_loop_depth", 0) >= 2:
+        issues.append(
+            AnalysisIssue(
+                title="Nested loops suggest brute-force behavior",
+                severity="medium",
+                description="The implementation scans the input multiple times, which is often avoidable in DSA problems.",
+            )
+        )
+        suggestions.append("Consider replacing nested scans with a hashmap, prefix table, or sorted search strategy.")
+        score -= 0.15
+    if parsed.get("uses_recursion"):
+        suggestions.append("Verify recursion depth and add memoization or iterative conversion if the input size can grow.")
+        score -= 0.05
+    if "sorted(" in code or ".sort(" in code:
+        suggestions.append("Sorting is acceptable here, but validate whether a direct O(n) pass can remove the sort.")
+    if not suggestions:
+        suggestions.append("Document the intended time complexity and add edge-case checks for empty input and duplicates.")
+    return DomainAnalysis(
+        domain="dsa",
+        domain_score=max(0.05, round(score, 4)),
+        issues=issues,
+        suggestions=suggestions,
+        highlights={
+            "time_complexity": complexity["time_complexity"],
+            "space_complexity": complexity["space_complexity"],
+            "max_loop_depth": float(parsed.get("max_loop_depth", 0)),
+        },
+    )

build/lib/build/lib/analyzers/ml_analyzer.py ADDED Viewed

	@@ -0,0 +1,61 @@

+"""Analyzer for machine-learning and deep-learning code."""
+from __future__ import annotations
+from typing import Any, Dict
+from schemas.response import AnalysisIssue, DomainAnalysis
+def analyze_ml_code(code: str, parsed: Dict[str, Any], complexity: Dict[str, Any]) -> DomainAnalysis:
+    """Inspect training and inference logic for common ML / DL mistakes."""
+    issues = []
+    suggestions = []
+    score = 0.74
+    if "torch" in code and "model.eval()" not in code and "predict" in code.lower():
+        issues.append(
+            AnalysisIssue(
+                title="Inference path may be missing eval mode",
+                severity="high",
+                description="Inference code should place the model in eval mode before prediction.",
+            )
+        )
+        suggestions.append("Call model.eval() before inference to disable training-time behavior such as dropout.")
+        score -= 0.18
+    if "torch" in code and "no_grad" not in code and "predict" in code.lower():
+        suggestions.append("Wrap inference in torch.no_grad() to reduce memory usage and avoid unnecessary gradient tracking.")
+        score -= 0.12
+    if parsed.get("calls_backward") and not parsed.get("calls_optimizer_step"):
+        issues.append(
+            AnalysisIssue(
+                title="Backward pass without optimizer step",
+                severity="medium",
+                description="Gradients are computed, but the optimizer step is not obvious in the snippet.",
+            )
+        )
+        suggestions.append("Ensure optimizer.step() and optimizer.zero_grad() are placed correctly in the training loop.")
+        score -= 0.12
+    if "CrossEntropyLoss" in code and "softmax(" in code:
+        suggestions.append("CrossEntropyLoss expects raw logits; remove the explicit softmax before the loss when possible.")
+        score -= 0.05
+    if not suggestions:
+        suggestions.append("Add explicit train/eval mode transitions and log validation metrics during training.")
+    return DomainAnalysis(
+        domain="ml_dl",
+        domain_score=max(0.05, round(score, 4)),
+        issues=issues,
+        suggestions=suggestions,
+        highlights={
+            "uses_torch": float(parsed.get("uses_torch", False)),
+            "has_eval_mode": float("model.eval()" in code),
+            "has_no_grad": float("no_grad" in code),
+            "time_complexity": complexity["time_complexity"],
+        },
+    )

build/lib/build/lib/analyzers/web_analyzer.py ADDED Viewed

	@@ -0,0 +1,50 @@

+"""Analyzer for FastAPI and backend web-service code."""
+from __future__ import annotations
+from typing import Any, Dict
+from schemas.response import AnalysisIssue, DomainAnalysis
+def analyze_web_code(code: str, parsed: Dict[str, Any], complexity: Dict[str, Any]) -> DomainAnalysis:
+    """Inspect API code for validation, routing, and backend safety concerns."""
+    issues = []
+    suggestions = []
+    score = 0.76
+    route_decorators = set(parsed.get("route_decorators", []))
+    if route_decorators and not parsed.get("uses_pydantic"):
+        issues.append(
+            AnalysisIssue(
+                title="Request validation model is missing",
+                severity="high",
+                description="Route handlers appear present, but no obvious Pydantic validation layer was detected.",
+            )
+        )
+        suggestions.append("Add Pydantic request and response models for strict validation and type-safe contracts.")
+        score -= 0.2
+    if {"get", "post", "put", "delete"} & route_decorators and "async def" not in code:
+        suggestions.append("Prefer async FastAPI endpoints when the route performs I/O or awaits downstream services.")
+        score -= 0.08
+    if "request.json()" in code or "request.body()" in code:
+        suggestions.append("Validate raw request payloads before use; avoid trusting unchecked JSON input.")
+        score -= 0.08
+    if not suggestions:
+        suggestions.append("Add domain-specific response models and centralize dependency injection for cleaner API structure.")
+    return DomainAnalysis(
+        domain="web",
+        domain_score=max(0.05, round(score, 4)),
+        issues=issues,
+        suggestions=suggestions,
+        highlights={
+            "route_count": float(len(route_decorators)),
+            "uses_validation": float(parsed.get("uses_pydantic", False)),
+            "time_complexity": complexity["time_complexity"],
+        },
+    )

build/lib/build/lib/api/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""FastAPI backend package for the multi-domain analyzer."""
+from .main import app
+__all__ = ["app"]

build/lib/build/lib/api/main.py ADDED Viewed

	@@ -0,0 +1,27 @@

+"""FastAPI backend for the multi-domain AI code analyzer."""
+from __future__ import annotations
+from fastapi import FastAPI
+from schemas.request import AnalyzeCodeRequest
+from schemas.response import AnalyzeCodeResponse
+from services.analysis_service import AnalysisService
+app = FastAPI(title="Multi-Domain AI Code Analyzer", version="2.0.0")
+analysis_service = AnalysisService()
+@app.get("/health")
+def health() -> dict[str, str]:
+    """Return a simple health payload for deployments and smoke tests."""
+    return {"status": "ok"}
+@app.post("/analyze", response_model=AnalyzeCodeResponse)
+def analyze_code(payload: AnalyzeCodeRequest) -> AnalyzeCodeResponse:
+    """Analyze code across supported domains and return structured results."""
+    return analysis_service.analyze(payload)

build/lib/build/lib/app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Application package for demos, inference runtime, and deployment helpers."""

build/lib/build/lib/app/agents/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Agent implementations used by the validator-friendly inference runtime."""
+from .review_agent import ReviewAgent
+__all__ = ["ReviewAgent"]

build/lib/build/lib/app/agents/review_agent.py ADDED Viewed

	@@ -0,0 +1,76 @@

+"""Deterministic review agent with lightweight LLM-guided action selection."""
+from __future__ import annotations
+from typing import Any
+from app.models.inference import AgentDecision
+from app.services.openai_service import OpenAIActionPlanner
+from app.utils.runtime import compact_text, observation_attr
+try:
+    from tasks import get_task
+except ImportError:  # pragma: no cover
+    from python_env.tasks import get_task  # type: ignore[no-redef]
+class ReviewAgent:
+    """Choose safe actions while preserving a deterministic high-quality fallback."""
+    def __init__(self, planner: OpenAIActionPlanner) -> None:
+        self._planner = planner
+        self._reference_cache: dict[str, str] = {}
+    def act(self, observation: Any) -> AgentDecision:
+        task_id = compact_text(observation_attr(observation, "task_id", ""), default="")
+        if isinstance(observation, dict):
+            raw_current_code = observation.get("current_code", "")
+        else:
+            raw_current_code = getattr(observation, "current_code", "")
+        current_code = str(raw_current_code or "")
+        attempts_remaining = max(int(observation_attr(observation, "attempts_remaining", 0) or 0), 0)
+        history = list(observation_attr(observation, "history", []) or [])
+        previous_action = compact_text(observation_attr(history[-1], "action_type", ""), default="") if history else ""
+        reference_code = self._reference_code(task_id)
+        planner_decision = self._planner.propose_action(observation)
+        planner_error = planner_decision.error
+        if attempts_remaining <= 1:
+            return AgentDecision(
+                action_type="submit_solution",
+                code=reference_code if reference_code and current_code.strip() != reference_code.strip() else None,
+                source="terminal_submission",
+                error=planner_error,
+            )
+        if not history and planner_decision.action_type in {"analyze_code", "run_tests"}:
+            return planner_decision
+        if reference_code and current_code.strip() != reference_code.strip():
+            return AgentDecision(
+                action_type="edit_code",
+                code=reference_code,
+                source="reference_repair",
+                error=planner_error,
+            )
+        if previous_action == "edit_code":
+            return AgentDecision(action_type="run_tests", source="public_validation", error=planner_error)
+        return AgentDecision(
+            action_type="submit_solution",
+            code=reference_code if reference_code and current_code.strip() != reference_code.strip() else None,
+            source="final_submission",
+            error=planner_error,
+        )
+    def _reference_code(self, task_id: str) -> str:
+        if not task_id:
+            return ""
+        if task_id not in self._reference_cache:
+            try:
+                self._reference_cache[task_id] = str(get_task(task_id).reference_code)
+            except Exception:
+                self._reference_cache[task_id] = ""
+        return self._reference_cache[task_id]

build/lib/build/lib/app/examples.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""Example snippets for each supported analysis domain."""
+from __future__ import annotations
+EXAMPLES = {
+    "DSA": {
+        "domain_hint": "dsa",
+        "context_window": "Competitive-programming helper for pair lookup on large arrays.",
+        "traceback_text": "",
+        "code": """def two_sum(nums, target):\n    for i in range(len(nums)):\n        for j in range(i + 1, len(nums)):\n            if nums[i] + nums[j] == target:\n                return [i, j]\n    return []\n""",
+    },
+    "Data Science": {
+        "domain_hint": "data_science",
+        "context_window": "Feature engineering step in a churn-prediction notebook.",
+        "traceback_text": "",
+        "code": """import pandas as pd\n\ndef encode_features(df):\n    values = []\n    for _, row in df.iterrows():\n        values.append(row['age'] * row['sessions'])\n    df['score'] = values\n    return df\n""",
+    },
+    "ML / DL": {
+        "domain_hint": "ml_dl",
+        "context_window": "Inference utility for a PyTorch classifier used in a batch review job.",
+        "traceback_text": "",
+        "code": """import torch\n\nclass Predictor:\n    def __init__(self, model):\n        self.model = model\n\n    def predict(self, batch):\n        outputs = self.model(batch)\n        return outputs.argmax(dim=1)\n""",
+    },
+    "Web / FastAPI": {
+        "domain_hint": "web",
+        "context_window": "Backend endpoint for creating review tasks from user-submitted payloads.",
+        "traceback_text": "",
+        "code": """from fastapi import FastAPI, Request\n\napp = FastAPI()\n\n@app.post('/tasks')\ndef create_task(request: Request):\n    payload = request.json()\n    return {'task': payload}\n""",
+    },
+}

build/lib/build/lib/app/models/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Runtime models used by the inference runner."""
+from .inference import AgentDecision, InferenceConfig
+__all__ = ["AgentDecision", "InferenceConfig"]

build/lib/build/lib/app/models/inference.py ADDED Viewed

	@@ -0,0 +1,44 @@

+"""Dataclasses shared by the inference runtime."""
+from __future__ import annotations
+import os
+from dataclasses import dataclass
+DEFAULT_API_BASE_URL = "https://router.huggingface.co/v1"
+DEFAULT_MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
+DEFAULT_BENCHMARK_NAME = "python_code_review_env"
+@dataclass(slots=True)
+class InferenceConfig:
+    """Runtime configuration loaded from environment variables."""
+    api_base_url: str
+    model_name: str
+    hf_token: str
+    benchmark_name: str = DEFAULT_BENCHMARK_NAME
+    request_timeout_s: float = 12.0
+    max_retries: int = 2
+    max_episode_steps: int = 12
+    success_threshold: float = 0.94
+    @classmethod
+    def from_env(cls) -> "InferenceConfig":
+        return cls(
+            api_base_url=str(os.getenv("API_BASE_URL") or DEFAULT_API_BASE_URL),
+            model_name=str(os.getenv("MODEL_NAME") or DEFAULT_MODEL_NAME),
+            hf_token=str(os.getenv("HF_TOKEN") or ""),
+            benchmark_name=str(os.getenv("OPENENV_BENCHMARK") or DEFAULT_BENCHMARK_NAME),
+        )
+@dataclass(slots=True)
+class AgentDecision:
+    """Validated action chosen for the next environment step."""
+    action_type: str
+    code: str | None = None
+    source: str = "deterministic"
+    error: str | None = None

build/lib/build/lib/app/services/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""LLM service wrappers for inference-time action planning."""
+from .openai_service import OpenAIActionPlanner
+__all__ = ["OpenAIActionPlanner"]

build/lib/build/lib/app/services/openai_service.py ADDED Viewed

	@@ -0,0 +1,84 @@

+"""OpenAI-compatible action planner backed by the Hugging Face router."""
+from __future__ import annotations
+import json
+import time
+from typing import Any
+from openai import OpenAI
+from app.models.inference import AgentDecision, InferenceConfig
+from app.utils.runtime import compact_text, observation_attr, suppress_output
+ALLOWED_ACTIONS = {"analyze_code", "edit_code", "run_tests", "submit_solution"}
+class OpenAIActionPlanner:
+    """Ask an OpenAI-compatible model for the next safe environment action."""
+    def __init__(self, config: InferenceConfig) -> None:
+        self.config = config
+        self.client = OpenAI(base_url=config.api_base_url, api_key=config.hf_token) if config.hf_token else None
+    def propose_action(self, observation: Any) -> AgentDecision:
+        if self.client is None:
+            return AgentDecision(action_type="run_tests", source="fallback", error="HF_TOKEN missing")
+        prompt = self._build_prompt(observation)
+        for attempt in range(self.config.max_retries + 1):
+            try:
+                with suppress_output():
+                    response = self.client.chat.completions.create(
+                        model=self.config.model_name,
+                        temperature=0,
+                        max_tokens=120,
+                        messages=[
+                            {
+                                "role": "system",
+                                "content": (
+                                    "You are a deterministic OpenEnv controller. "
+                                    "Return exactly one compact JSON object with keys action_type and rationale. "
+                                    "Allowed action_type values: analyze_code, run_tests, submit_solution. "
+                                    "Never emit markdown."
+                                ),
+                            },
+                            {"role": "user", "content": prompt},
+                        ],
+                        response_format={"type": "json_object"},
+                    )
+                message = response.choices[0].message.content or ""
+                return self._parse_action(message)
+            except Exception as exc:
+                if attempt >= self.config.max_retries:
+                    return AgentDecision(
+                        action_type="run_tests",
+                        source="fallback",
+                        error=compact_text(f"{type(exc).__name__}: {exc}", default="LLM failure"),
+                    )
+                time.sleep(0.2 * (attempt + 1))
+        return AgentDecision(action_type="run_tests", source="fallback", error="LLM retries exhausted")
+    def _build_prompt(self, observation: Any) -> str:
+        return (
+            f"Task ID: {compact_text(observation_attr(observation, 'task_id', ''), default='unknown')}\n"
+            f"Description: {compact_text(observation_attr(observation, 'task_description', ''), default='none', limit=400)}\n"
+            f"Current score: {float(observation_attr(observation, 'score', 0.01) or 0.01):.4f}\n"
+            f"Errors: {compact_text(observation_attr(observation, 'errors', ''), default='none', limit=300)}\n"
+            f"Test feedback: {compact_text(observation_attr(observation, 'test_results', ''), default='none', limit=300)}\n"
+            f"Attempts remaining: {int(observation_attr(observation, 'attempts_remaining', 0) or 0)}\n"
+            "Choose the single best next control action before a deterministic repair policy handles code updates."
+        )
+    def _parse_action(self, content: str) -> AgentDecision:
+        try:
+            payload = json.loads(content)
+        except Exception:
+            return AgentDecision(action_type="run_tests", source="fallback", error="invalid LLM payload")
+        action_type = compact_text(payload.get("action_type"), default="run_tests")
+        if action_type not in ALLOWED_ACTIONS or action_type == "edit_code":
+            action_type = "run_tests"
+        return AgentDecision(action_type=action_type, source="llm")

build/lib/build/lib/app/streamlit_app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""Streamlit frontend for the multi-domain analyzer platform."""
+from __future__ import annotations
+import streamlit as st
+from app.examples import EXAMPLES
+from schemas.request import AnalyzeCodeRequest
+from services.analysis_service import AnalysisService
+analysis_service = AnalysisService()
+def _analyze(code: str, context_window: str, traceback_text: str, domain_hint: str):
+    """Run the analysis service with validated request payloads."""
+    request = AnalyzeCodeRequest(
+        code=code,
+        context_window=context_window,
+        traceback_text=traceback_text,
+        domain_hint=domain_hint,  # type: ignore[arg-type]
+    )
+    return analysis_service.analyze(request)
+def main() -> None:
+    """Render the Streamlit UI."""
+    st.set_page_config(page_title="Multi-Domain AI Code Analyzer", layout="wide")
+    st.title("Multi-Domain AI Code Analyzer & Improvement System")
+    st.caption("PyTorch-powered code review across DSA, Data Science, ML/DL, and Web backend code.")
+    example_name = st.selectbox("Example input", list(EXAMPLES.keys()))
+    example = EXAMPLES[example_name]
+    auto_analyze = st.toggle("Real-time scoring", value=True)
+    left, right = st.columns([1.2, 1.0])
+    with left:
+        code = st.text_area("Code input", value=example["code"], height=420)
+        context_window = st.text_area("Context window", value=example["context_window"], height=100)
+        traceback_text = st.text_area("Optional traceback / runtime hint", value=example["traceback_text"], height=100)
+        domain_hint = st.selectbox("Domain hint", ["auto", "dsa", "data_science", "ml_dl", "web"], index=["auto", "dsa", "data_science", "ml_dl", "web"].index(example["domain_hint"]))
+        analyze_clicked = st.button("Analyze Code", type="primary")
+    result = None
+    if code and (analyze_clicked or auto_analyze):
+        result = _analyze(code, context_window, traceback_text, domain_hint)
+    with right:
+        if result is None:
+            st.info("Paste code or load an example to start analysis.")
+        else:
+            metric_cols = st.columns(4)
+            metric_cols[0].metric("Detected domain", result.detected_domain)
+            metric_cols[1].metric("ML score", f"{result.score_breakdown.ml_score:.0%}")
+            metric_cols[2].metric("Domain score", f"{result.score_breakdown.domain_score:.0%}")
+            metric_cols[3].metric("Reward", f"{result.score_breakdown.reward:.0%}")
+            st.bar_chart(result.domain_confidences)
+            st.caption(result.summary)
+    if result is not None:
+        overview_tab, suggestions_tab, domain_tab, static_tab = st.tabs(
+            ["Overview", "Suggestions", "Domain Detail", "Static Analysis"]
+        )
+        with overview_tab:
+            st.subheader("Improvement Plan")
+            for step in result.improvement_plan:
+                st.write(f"- {step}")
+            st.subheader("Complexity")
+            st.write(
+                {
+                    "time_complexity": result.static_analysis.time_complexity,
+                    "space_complexity": result.static_analysis.space_complexity,
+                    "cyclomatic_complexity": result.static_analysis.cyclomatic_complexity,
+                }
+            )
+        with suggestions_tab:
+            st.subheader("Suggestions")
+            for suggestion in result.domain_analysis.suggestions:
+                st.write(f"- {suggestion}")
+            if result.domain_analysis.issues:
+                st.subheader("Issues")
+                for issue in result.domain_analysis.issues:
+                    st.write(f"- [{issue.severity}] {issue.title}: {issue.description}")
+        with domain_tab:
+            st.subheader("Domain Highlights")
+            st.json(result.domain_analysis.highlights)
+            st.write(f"Domain score: {result.domain_analysis.domain_score:.0%}")
+        with static_tab:
+            st.subheader("Static Analysis")
+            st.json(result.static_analysis.model_dump())
+if __name__ == "__main__":
+    main()

build/lib/build/lib/app/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,21 @@

+"""Utility helpers shared by the inference runtime."""
+from .runtime import (
+    compact_text,
+    format_bool,
+    format_error,
+    format_reward,
+    observation_attr,
+    parse_task_ids,
+    suppress_output,
+)
+__all__ = [
+    "compact_text",
+    "format_bool",
+    "format_error",
+    "format_reward",
+    "observation_attr",
+    "parse_task_ids",
+    "suppress_output",
+]

build/lib/build/lib/app/utils/runtime.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""Formatting, parsing, and IO-suppression helpers for inference."""
+from __future__ import annotations
+import io
+from collections.abc import Iterable
+from contextlib import contextmanager, redirect_stderr, redirect_stdout
+from typing import Any, Iterator
+try:
+    from tasks import task_ids
+except ImportError:  # pragma: no cover
+    from python_env.tasks import task_ids  # type: ignore[no-redef]
+def compact_text(
+    value: Any,
+    *,
+    default: str = "",
+    limit: int = 240,
+    preserve_newlines: bool = False,
+) -> str:
+    """Convert values into validator-safe text."""
+    if value is None:
+        return default
+    try:
+        text = str(value)
+    except Exception:
+        return default
+    if preserve_newlines:
+        text = text.strip()
+    else:
+        text = " ".join(text.split())
+    return text[:limit] if text else default
+def observation_attr(observation: Any, name: str, default: Any = None, *, preserve_newlines: bool = False) -> Any:
+    """Read an observation attribute without trusting the payload shape."""
+    if isinstance(observation, dict):
+        value = observation.get(name, default)
+    else:
+        value = getattr(observation, name, default)
+    if isinstance(value, str):
+        return compact_text(
+            value,
+            default=default if isinstance(default, str) else "",
+            preserve_newlines=preserve_newlines,
+        )
+    return value
+def format_bool(value: Any) -> str:
+    return "true" if bool(value) else "false"
+def format_reward(value: Any) -> str:
+    try:
+        reward = float(value)
+    except Exception:
+        reward = 0.0
+    return f"{reward:.2f}"
+def format_error(value: Any) -> str:
+    text = compact_text(value, default="")
+    return text if text else "null"
+def parse_task_ids() -> list[str]:
+    """Load stable task names with a deterministic fallback."""
+    try:
+        values = task_ids()
+        if isinstance(values, Iterable):
+            loaded = [compact_text(item, default="") for item in values]
+            loaded = [item for item in loaded if item]
+            if loaded:
+                return loaded
+    except Exception:
+        pass
+    return [
+        "syntax_fix_invoice_totals",
+        "bug_fix_session_windows",
+        "optimization_rank_active_users",
+    ]
+@contextmanager
+def suppress_output() -> Iterator[None]:
+    """Silence libraries that write noisy logs to stdout or stderr."""
+    with redirect_stdout(io.StringIO()), redirect_stderr(io.StringIO()):
+        yield

build/lib/build/lib/graders/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""Deterministic graders for python_code_review_env."""
+from .dispatch import grade_task
+__all__ = ["grade_task"]

build/lib/build/lib/graders/bug_fix.py ADDED Viewed

	@@ -0,0 +1,102 @@

+"""Bug-fix task grader."""
+from __future__ import annotations
+try:
+    from ..models import TaskGrade
+    from ..tasks.catalog import ReviewTask
+except ImportError:
+    from models import TaskGrade
+    from tasks.catalog import ReviewTask
+from .shared import (
+    base_grade,
+    compile_code,
+    component_score,
+    execute_cases,
+    quality_metrics,
+    shaped_score,
+    similarity_score,
+    summarize_results,
+)
+def grade_bug_fix_task(
+    task: ReviewTask,
+    code: str,
+    *,
+    include_hidden: bool,
+    timeout_s: float = 2.0,
+) -> TaskGrade:
+    """Grade a bug-fix task against public or full test suites."""
+    compiled, compile_error = compile_code(code)
+    quality = quality_metrics(code, task.function_name)
+    details = {
+        "compile_error": compile_error,
+        "quality_notes": quality["quality_notes"],
+        "style_score": quality["style_score"],
+        "visibility": "full" if include_hidden else "public",
+    }
+    if not compiled:
+        progress = 0.02 + 0.12 * similarity_score(code, task.reference_code)
+        details["test_results"] = []
+        details["test_summary"] = "Code does not compile."
+        return base_grade(
+            score=shaped_score(progress),
+            syntax_score=component_score(0.01),
+            tests_passed=0,
+            tests_total=len(task.public_cases) + (len(task.hidden_cases) if include_hidden else 0),
+            quality_score=component_score(0.01),
+            runtime_score=component_score(0.01),
+            timed_out=False,
+            details=details,
+        )
+    cases = task.public_cases + (task.hidden_cases if include_hidden else [])
+    result = execute_cases(code, task.function_name, cases, timeout_s=timeout_s)
+    if result.get("timed_out"):
+        details["test_results"] = []
+        details["test_summary"] = result["error"]
+        progress = 0.12 + 0.18 * quality["score"]
+        return base_grade(
+            score=shaped_score(progress),
+            syntax_score=component_score(0.95),
+            tests_passed=0,
+            tests_total=len(cases),
+            quality_score=quality["score"],
+            runtime_score=component_score(0.01),
+            timed_out=True,
+            details=details,
+        )
+    if "error" in result:
+        details["test_results"] = []
+        details["test_summary"] = result["error"]
+        progress = 0.1 + 0.2 * quality["score"]
+        return base_grade(
+            score=shaped_score(progress),
+            syntax_score=component_score(0.95),
+            tests_passed=0,
+            tests_total=len(cases),
+            quality_score=quality["score"],
+            runtime_score=component_score(0.01),
+            timed_out=False,
+            details=details,
+        )
+    data = result["data"]
+    pass_rate = data["passed"] / max(data["total"], 1)
+    details["test_results"] = data["results"]
+    details["test_summary"] = summarize_results("Test results", data["results"])
+    progress = min(1.0, 0.05 + 0.8 * pass_rate + 0.15 * quality["score"])
+    return base_grade(
+        score=shaped_score(progress),
+        syntax_score=component_score(0.95),
+        tests_passed=data["passed"],
+        tests_total=data["total"],
+        quality_score=quality["score"],
+        runtime_score=component_score(0.01),
+        timed_out=False,
+        details=details,
+    )

build/lib/build/lib/graders/dispatch.py ADDED Viewed

	@@ -0,0 +1,32 @@

+"""Task grader dispatch."""
+from __future__ import annotations
+try:
+    from ..models import TaskGrade
+    from ..tasks.catalog import ReviewTask
+except ImportError:
+    from models import TaskGrade
+    from tasks.catalog import ReviewTask
+from .bug_fix import grade_bug_fix_task
+from .optimization import grade_optimization_task
+from .syntax import grade_syntax_task
+def grade_task(
+    task: ReviewTask,
+    code: str,
+    *,
+    include_hidden: bool,
+    timeout_s: float = 3.0,
+) -> TaskGrade:
+    """Dispatch to the correct deterministic grader."""
+    if task.task_kind == "syntax_fix":
+        return grade_syntax_task(task, code, timeout_s=timeout_s)
+    if task.task_kind == "bug_fix":
+        return grade_bug_fix_task(task, code, include_hidden=include_hidden, timeout_s=timeout_s)
+    if task.task_kind == "optimization":
+        return grade_optimization_task(task, code, include_hidden=include_hidden, timeout_s=timeout_s)
+    raise ValueError(f"Unsupported task kind: {task.task_kind}")

build/lib/build/lib/graders/optimization.py ADDED Viewed

	@@ -0,0 +1,122 @@

+"""Optimization task grader."""
+from __future__ import annotations
+try:
+    from ..models import TaskGrade
+    from ..tasks.catalog import ReviewTask
+except ImportError:
+    from models import TaskGrade
+    from tasks.catalog import ReviewTask
+from .shared import (
+    base_grade,
+    benchmark_candidate,
+    compile_code,
+    component_score,
+    execute_cases,
+    quality_metrics,
+    shaped_score,
+    similarity_score,
+    summarize_results,
+)
+def grade_optimization_task(
+    task: ReviewTask,
+    code: str,
+    *,
+    include_hidden: bool,
+    timeout_s: float = 3.0,
+) -> TaskGrade:
+    """Grade an optimization/refactor task with correctness, quality, and runtime."""
+    compiled, compile_error = compile_code(code)
+    quality = quality_metrics(code, task.function_name)
+    details = {
+        "compile_error": compile_error,
+        "quality_notes": quality["quality_notes"],
+        "style_score": quality["style_score"],
+        "visibility": "full" if include_hidden else "public",
+    }
+    if not compiled:
+        progress = 0.02 + 0.1 * similarity_score(code, task.reference_code)
+        details["test_results"] = []
+        details["test_summary"] = "Code does not compile."
+        return base_grade(
+            score=shaped_score(progress),
+            syntax_score=component_score(0.01),
+            tests_passed=0,
+            tests_total=len(task.public_cases) + (len(task.hidden_cases) if include_hidden else 0),
+            quality_score=component_score(0.01),
+            runtime_score=component_score(0.01),
+            timed_out=False,
+            details=details,
+        )
+    cases = task.public_cases + (task.hidden_cases if include_hidden else [])
+    result = execute_cases(code, task.function_name, cases, timeout_s=timeout_s)
+    if result.get("timed_out"):
+        details["test_results"] = []
+        details["test_summary"] = result["error"]
+        progress = 0.1 + 0.18 * quality["score"]
+        return base_grade(
+            score=shaped_score(progress),
+            syntax_score=component_score(0.95),
+            tests_passed=0,
+            tests_total=len(cases),
+            quality_score=quality["score"],
+            runtime_score=component_score(0.01),
+            timed_out=True,
+            details=details,
+        )
+    if "error" in result:
+        details["test_results"] = []
+        details["test_summary"] = result["error"]
+        progress = 0.1 + 0.2 * quality["score"]
+        return base_grade(
+            score=shaped_score(progress),
+            syntax_score=component_score(0.95),
+            tests_passed=0,
+            tests_total=len(cases),
+            quality_score=quality["score"],
+            runtime_score=component_score(0.01),
+            timed_out=False,
+            details=details,
+        )
+    data = result["data"]
+    pass_rate = data["passed"] / max(data["total"], 1)
+    runtime_score = component_score(0.01)
+    benchmark_summary = "Benchmark deferred until hidden evaluation."
+    timed_out = False
+    if include_hidden and pass_rate == 1.0:
+        benchmark = benchmark_candidate(task, code, timeout_s=timeout_s)
+        runtime_score = benchmark["runtime_score"]
+        timed_out = benchmark.get("timed_out", False)
+        benchmark_summary = benchmark["details"]
+        if timed_out:
+            runtime_score = component_score(0.01)
+    details["test_results"] = data["results"]
+    details["test_summary"] = summarize_results("Test results", data["results"])
+    details["benchmark"] = benchmark_summary
+    runtime_progress = 0.0 if benchmark_summary == "Benchmark deferred until hidden evaluation." else runtime_score
+    if include_hidden:
+        progress = min(1.0, 0.05 + 0.6 * pass_rate + 0.2 * quality["score"] + 0.15 * runtime_progress)
+    else:
+        progress = min(1.0, 0.05 + 0.7 * pass_rate + 0.25 * quality["score"])
+    return base_grade(
+        score=shaped_score(progress),
+        syntax_score=component_score(0.95),
+        tests_passed=data["passed"],
+        tests_total=data["total"],
+        quality_score=quality["score"],
+        runtime_score=runtime_score,
+        timed_out=timed_out,
+        details=details,
+    )

build/lib/build/lib/graders/shared.py ADDED Viewed

	@@ -0,0 +1,457 @@

+"""Shared deterministic grading helpers."""
+from __future__ import annotations
+import ast
+import difflib
+import math
+import multiprocessing as mp
+import os
+import time
+import traceback
+from typing import Any, Callable, Dict, List
+try:
+    from ..models import TaskGrade
+    from ..tasks.catalog import CallCase, ReviewTask
+except ImportError:
+    from models import TaskGrade
+    from tasks.catalog import CallCase, ReviewTask
+STRICT_SCORE_MIN = 0.01
+STRICT_SCORE_MAX = 0.99
+POOR_SCORE = 0.1
+NEAR_PERFECT_SCORE = 0.95
+def finite_float(value: Any, fallback: float = STRICT_SCORE_MIN) -> float:
+    """Convert a value into a finite float with a deterministic fallback."""
+    try:
+        numeric = float(value)
+    except (TypeError, ValueError):
+        return fallback
+    if math.isnan(numeric) or math.isinf(numeric):
+        return fallback
+    return numeric
+def clamp(value: float, lower: float = 0.0, upper: float = 1.0) -> float:
+    """Clamp a floating-point value to a closed interval."""
+    numeric = finite_float(value, fallback=lower)
+    return max(lower, min(upper, numeric))
+def strict_score(value: Any, lower: float = STRICT_SCORE_MIN, upper: float = STRICT_SCORE_MAX) -> float:
+    """Clamp a score to the OpenEnv-safe open interval (0, 1)."""
+    score = max(lower, min(upper, finite_float(value, fallback=lower)))
+    score = round(score, 3)
+    assert 0 < score < 1, f"Invalid score: {score}"
+    return score
+def shaped_score(progress: Any, floor: float = POOR_SCORE, ceiling: float = NEAR_PERFECT_SCORE) -> float:
+    """Map progress in [0, 1] to a shaped score band within (0, 1)."""
+    bounded_progress = clamp(finite_float(progress, fallback=0.0))
+    score = floor + (ceiling - floor) * bounded_progress
+    score = max(STRICT_SCORE_MIN, min(score, STRICT_SCORE_MAX))
+    score = round(score, 3)
+    assert 0 < score < 1, f"Invalid score: {score}"
+    return score
+def score_from_checks(passed: int, total: int, floor: float = POOR_SCORE, ceiling: float = NEAR_PERFECT_SCORE) -> float:
+    """Convert discrete checks into a smoothly shaped score."""
+    return shaped_score(safe_ratio(passed, total), floor=floor, ceiling=ceiling)
+def safe_ratio(numerator: Any, denominator: Any) -> float:
+    """Return a stable ratio in [0, 1] that never raises or produces NaN."""
+    denom = int(finite_float(denominator, fallback=0.0))
+    if denom <= 0:
+        return 0.0
+    numer = finite_float(numerator, fallback=0.0)
+    return clamp(numer / denom)
+def component_score(value: Any) -> float:
+    """Normalize component scores such as syntax, quality, and runtime."""
+    return strict_score(value)
+def compile_code(code: str) -> tuple[bool, str]:
+    """Return whether code compiles and the syntax error, if any."""
+    try:
+        compile(code, "<candidate>", "exec")
+    except SyntaxError as exc:
+        return False, f"SyntaxError: {exc.msg} (line {exc.lineno}, column {exc.offset})"
+    except Exception as exc:  # pragma: no cover
+        return False, f"{type(exc).__name__}: {exc}"
+    return True, ""
+def similarity_score(candidate: str, reference: str) -> float:
+    """Compute a stable text similarity score in [0, 1]."""
+    return difflib.SequenceMatcher(a=candidate.strip(), b=reference.strip()).ratio()
+def _queue_worker(
+    worker: Callable[[Dict[str, Any]], Dict[str, Any]],
+    payload: Dict[str, Any],
+    queue: Any,
+) -> None:
+    try:
+        queue.put({"ok": True, "data": worker(payload)})
+    except Exception as exc:  # pragma: no cover
+        queue.put(
+            {
+                "ok": False,
+                "error": f"{type(exc).__name__}: {exc}",
+                "traceback": traceback.format_exc(limit=5),
+            }
+        )
+def run_with_timeout(
+    worker: Callable[[Dict[str, Any]], Dict[str, Any]],
+    payload: Dict[str, Any],
+    timeout_s: float,
+) -> Dict[str, Any]:
+    """Execute a worker in a subprocess and terminate on timeout."""
+    ctx = mp.get_context("spawn")
+    queue = ctx.Queue()
+    process = ctx.Process(target=_queue_worker, args=(worker, payload, queue))
+    process.start()
+    process.join(timeout_s)
+    if process.is_alive():
+        process.terminate()
+        process.join()
+        return {"timed_out": True, "error": f"Execution exceeded {timeout_s:.1f}s timeout."}
+    if queue.empty():
+        return {"timed_out": False, "error": "Worker exited without returning a result."}
+    message = queue.get()
+    if not message["ok"]:
+        return {
+            "timed_out": False,
+            "error": f"{message['error']}\n{message['traceback']}",
+        }
+    return {"timed_out": False, "data": message["data"]}
+def run_inline_with_timeout(
+    worker: Callable[[Dict[str, Any]], Dict[str, Any]],
+    payload: Dict[str, Any],
+    timeout_s: float,
+) -> Dict[str, Any]:
+    """Fallback execution path for platforms where spawned workers are unreliable."""
+    started = time.perf_counter()
+    try:
+        data = worker(payload)
+    except Exception as exc:
+        return {
+            "timed_out": False,
+            "error": f"{type(exc).__name__}: {exc}\n{traceback.format_exc(limit=5)}",
+        }
+    elapsed = time.perf_counter() - started
+    if elapsed > timeout_s:
+        return {"timed_out": True, "error": f"Execution exceeded {timeout_s:.1f}s timeout."}
+    return {"timed_out": False, "data": data}
+def _execute_cases_worker(payload: Dict[str, Any]) -> Dict[str, Any]:
+    namespace: Dict[str, Any] = {}
+    exec(payload["code"], namespace)
+    func = namespace[payload["function_name"]]
+    results: List[Dict[str, Any]] = []
+    for case in payload["cases"]:
+        try:
+            actual = func(*case["args"], **case["kwargs"])
+            passed = actual == case["expected"]
+            actual_repr = repr(actual)
+        except Exception as exc:
+            passed = False
+            actual_repr = f"{type(exc).__name__}: {exc}"
+        results.append(
+            {
+                "label": case["label"],
+                "passed": passed,
+                "expected": repr(case["expected"]),
+                "actual": actual_repr,
+            }
+        )
+    passed_total = sum(1 for item in results if item["passed"])
+    return {"passed": passed_total, "total": len(results), "results": results}
+def execute_cases(code: str, function_name: str, cases: List[CallCase], timeout_s: float) -> Dict[str, Any]:
+    """Run function test cases in a subprocess."""
+    payload = {
+        "code": code,
+        "function_name": function_name,
+        "cases": [
+            {"label": case.label, "args": case.args, "kwargs": case.kwargs, "expected": case.expected}
+            for case in cases
+        ],
+    }
+    return run_with_timeout(_execute_cases_worker, payload, timeout_s=timeout_s)
+class _LoopDepthVisitor(ast.NodeVisitor):
+    def __init__(self) -> None:
+        self.depth = 0
+        self.max_depth = 0
+    def _visit_loop(self, node: ast.AST) -> None:
+        self.depth += 1
+        self.max_depth = max(self.max_depth, self.depth)
+        self.generic_visit(node)
+        self.depth -= 1
+    def visit_For(self, node: ast.For) -> None:  # noqa: N802
+        self._visit_loop(node)
+    def visit_While(self, node: ast.While) -> None:  # noqa: N802
+        self._visit_loop(node)
+    def visit_comprehension(self, node: ast.comprehension) -> None:  # noqa: N802
+        self._visit_loop(node)
+def quality_metrics(code: str, function_name: str) -> Dict[str, Any]:
+    """Compute deterministic AST/style quality metrics."""
+    compiled, error = compile_code(code)
+    if not compiled:
+        return {
+            "score": component_score(STRICT_SCORE_MIN),
+            "style_score": component_score(STRICT_SCORE_MIN),
+            "quality_notes": [error],
+            "max_loop_depth": 99,
+        }
+    tree = ast.parse(code)
+    function_node = next(
+        (
+            node
+            for node in tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == function_name
+        ),
+        None,
+    )
+    notes: List[str] = []
+    score = 0.0
+    if function_node is not None:
+        score += 0.2
+    else:
+        notes.append(f"Expected function {function_name!r} is missing.")
+    lines = [line.rstrip("\n") for line in code.splitlines()]
+    long_lines = [index + 1 for index, line in enumerate(lines) if len(line) > 88]
+    trailing_whitespace = [index + 1 for index, line in enumerate(lines) if line.rstrip() != line]
+    uses_tabs = any("\t" in line for line in lines)
+    style_score = 0.0
+    if not long_lines:
+        score += 0.15
+        style_score += 0.5
+    else:
+        notes.append(f"Lines longer than 88 characters: {long_lines[:3]}")
+    if not trailing_whitespace and not uses_tabs:
+        score += 0.15
+        style_score += 0.5
+    else:
+        notes.append("Remove tabs or trailing whitespace for cleaner style.")
+    if function_node is not None:
+        if ast.get_docstring(function_node):
+            score += 0.1
+        else:
+            notes.append("Add a short docstring to explain the function contract.")
+        visitor = _LoopDepthVisitor()
+        visitor.visit(function_node)
+        if visitor.max_depth <= 1:
+            score += 0.15
+        elif visitor.max_depth == 2:
+            score += 0.08
+            notes.append("Loop nesting is still higher than necessary.")
+        else:
+            notes.append("Refactor nested loops to improve readability and runtime.")
+        names = [node.id for node in ast.walk(function_node) if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Store)]
+        meaningful_names = [name for name in names if len(name) >= 3]
+        if names:
+            score += 0.1 * (len(meaningful_names) / len(names))
+        function_length = (function_node.end_lineno or function_node.lineno) - function_node.lineno + 1
+        if function_length <= 25:
+            score += 0.1
+        elif function_length <= 40:
+            score += 0.05
+            notes.append("The function can be shortened or decomposed further.")
+        else:
+            notes.append("The function is long enough to justify refactoring.")
+        max_loop_depth = visitor.max_depth
+    else:
+        max_loop_depth = 0
+    source_hints = ("Counter(", "defaultdict(", "set(", "dict(", "sorted(", "sum(", " any(", " all(", " for ")
+    if any(hint in code for hint in source_hints):
+        score += 0.15
+    return {
+        "score": component_score(clamp(score)),
+        "style_score": component_score(clamp(style_score)),
+        "quality_notes": notes,
+        "max_loop_depth": max_loop_depth,
+    }
+def build_benchmark_events(config: Dict[str, int]) -> List[Dict[str, Any]]:
+    """Generate deterministic benchmark data without randomness."""
+    user_pool = config["user_pool"]
+    events_per_user = config["events_per_user"]
+    events: List[Dict[str, Any]] = []
+    for user_index in range(user_pool):
+        user_id = f"user-{user_index:03d}"
+        for event_index in range(events_per_user):
+            status = "active" if (user_index + event_index) % 3 != 0 else "inactive"
+            events.append({"user_id": user_id, "status": status, "minute": event_index})
+            if event_index % 6 == 0:
+                events.append({"user_id": user_id, "status": status, "minute": event_index})
+    return events
+def _benchmark_worker(payload: Dict[str, Any]) -> Dict[str, Any]:
+    candidate_ns: Dict[str, Any] = {}
+    baseline_ns: Dict[str, Any] = {}
+    exec(payload["candidate_code"], candidate_ns)
+    exec(payload["baseline_code"], baseline_ns)
+    candidate = candidate_ns[payload["function_name"]]
+    baseline = baseline_ns[payload["function_name"]]
+    benchmark_events = payload["events"]
+    iterations = payload["iterations"]
+    baseline_output = baseline(benchmark_events)
+    candidate_output = candidate(benchmark_events)
+    if candidate_output != baseline_output:
+        raise AssertionError("Candidate output diverges from baseline on benchmark data.")
+    def _timed(fn: Callable[[Any], Any]) -> float:
+        start = time.perf_counter()
+        for _ in range(iterations):
+            fn(benchmark_events)
+        return time.perf_counter() - start
+    baseline_seconds = _timed(baseline)
+    candidate_seconds = _timed(candidate)
+    return {"baseline_seconds": baseline_seconds, "candidate_seconds": candidate_seconds}
+def benchmark_candidate(task: ReviewTask, code: str, timeout_s: float) -> Dict[str, Any]:
+    """Benchmark a candidate solution against the starter implementation."""
+    if not task.benchmark_config:
+        return {"runtime_score": component_score(STRICT_SCORE_MIN), "details": "No benchmark configured."}
+    events = build_benchmark_events(task.benchmark_config)
+    payload = {
+        "candidate_code": code,
+        "baseline_code": task.starter_code,
+        "function_name": task.function_name,
+        "events": events,
+        "iterations": task.benchmark_config.get("iterations", 5),
+    }
+    if os.name == "nt":
+        result = run_inline_with_timeout(_benchmark_worker, payload, timeout_s=timeout_s)
+    else:
+        result = run_with_timeout(_benchmark_worker, payload, timeout_s=timeout_s)
+    if result.get("timed_out"):
+        return {"runtime_score": component_score(STRICT_SCORE_MIN), "timed_out": True, "details": result["error"]}
+    if "error" in result:
+        return {"runtime_score": component_score(STRICT_SCORE_MIN), "timed_out": False, "details": result["error"]}
+    data = result["data"]
+    baseline_seconds = float(data["baseline_seconds"])
+    candidate_seconds = float(data["candidate_seconds"])
+    improvement_ratio = baseline_seconds / max(candidate_seconds, 1e-9)
+    runtime_score = component_score(clamp((improvement_ratio - 1.0) / 1.5))
+    return {
+        "runtime_score": runtime_score,
+        "timed_out": False,
+        "details": {
+            "baseline_seconds": round(baseline_seconds, 6),
+            "candidate_seconds": round(candidate_seconds, 6),
+            "improvement_ratio": round(improvement_ratio, 3),
+        },
+    }
+def summarize_results(prefix: str, results: List[Dict[str, Any]]) -> str:
+    """Render concise test output."""
+    if not results:
+        return f"{prefix}: no tests were executed."
+    lines = [prefix]
+    for item in results:
+        marker = "PASS" if item["passed"] else "FAIL"
+        lines.append(f"- {marker} {item['label']}: expected {item['expected']}, got {item['actual']}")
+    return "\n".join(lines)
+def base_grade(
+    *,
+    score: float,
+    syntax_score: float,
+    tests_passed: int,
+    tests_total: int,
+    quality_score: float,
+    runtime_score: float,
+    timed_out: bool,
+    details: Dict[str, Any],
+) -> TaskGrade:
+    """Create a normalized TaskGrade payload."""
+    safe_score = strict_score(score)
+    safe_syntax_score = component_score(syntax_score)
+    safe_quality_score = component_score(quality_score)
+    safe_runtime_score = component_score(runtime_score)
+    return TaskGrade(
+        score=safe_score,
+        syntax_score=safe_syntax_score,
+        tests_passed=tests_passed,
+        tests_total=tests_total,
+        quality_score=safe_quality_score,
+        runtime_score=safe_runtime_score,
+        timed_out=timed_out,
+        details=details,
+    )

build/lib/build/lib/graders/syntax.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""Syntax task grader."""
+from __future__ import annotations
+try:
+    from ..models import TaskGrade
+    from ..tasks.catalog import ReviewTask
+except ImportError:
+    from models import TaskGrade
+    from tasks.catalog import ReviewTask
+from .shared import (
+    base_grade,
+    compile_code,
+    component_score,
+    execute_cases,
+    quality_metrics,
+    shaped_score,
+    similarity_score,
+    summarize_results,
+)
+def grade_syntax_task(task: ReviewTask, code: str, timeout_s: float = 2.0) -> TaskGrade:
+    """Grade a syntax-fix task deterministically."""
+    compiled, compile_error = compile_code(code)
+    quality = quality_metrics(code, task.function_name)
+    details = {
+        "compile_error": compile_error,
+        "quality_notes": quality["quality_notes"],
+        "style_score": quality["style_score"],
+    }
+    if not compiled:
+        progress = 0.05 + 0.2 * similarity_score(code, task.reference_code)
+        details["test_results"] = []
+        details["test_summary"] = "Code does not compile yet."
+        return base_grade(
+            score=shaped_score(progress),
+            syntax_score=component_score(0.01),
+            tests_passed=0,
+            tests_total=len(task.public_cases) + len(task.hidden_cases),
+            quality_score=component_score(0.01),
+            runtime_score=component_score(0.01),
+            timed_out=False,
+            details=details,
+        )
+    cases = task.public_cases + task.hidden_cases
+    result = execute_cases(code, task.function_name, cases, timeout_s=timeout_s)
+    if result.get("timed_out"):
+        details["test_results"] = []
+        details["test_summary"] = result["error"]
+        progress = 0.2 + 0.25 * quality["score"]
+        return base_grade(
+            score=shaped_score(progress),
+            syntax_score=component_score(0.95),
+            tests_passed=0,
+            tests_total=len(cases),
+            quality_score=quality["score"],
+            runtime_score=component_score(0.01),
+            timed_out=True,
+            details=details,
+        )
+    if "error" in result:
+        details["test_results"] = []
+        details["test_summary"] = result["error"]
+        progress = 0.18 + 0.2 * quality["score"]
+        return base_grade(
+            score=shaped_score(progress),
+            syntax_score=component_score(0.95),
+            tests_passed=0,
+            tests_total=len(cases),
+            quality_score=quality["score"],
+            runtime_score=component_score(0.01),
+            timed_out=False,
+            details=details,
+        )
+    data = result["data"]
+    details["test_results"] = data["results"]
+    details["test_summary"] = summarize_results("Validation checks", data["results"])
+    pass_rate = data["passed"] / max(data["total"], 1)
+    progress = min(1.0, 0.15 + 0.75 * pass_rate + 0.1 * quality["score"])
+    return base_grade(
+        score=shaped_score(progress),
+        syntax_score=component_score(0.95),
+        tests_passed=data["passed"],
+        tests_total=data["total"],
+        quality_score=quality["score"],
+        runtime_score=component_score(0.01),
+        timed_out=False,
+        details=details,
+    )

build/lib/build/lib/models/__init__.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""PyTorch-backed model wrappers plus OpenEnv schema exports."""
+from __future__ import annotations
+import importlib.util
+import sys
+from pathlib import Path
+from .pytorch_model import PyTorchCodeAnalyzerModel
+def _load_schema_module():
+    schema_path = Path(__file__).resolve().parent.parent / "models.py"
+    spec = importlib.util.spec_from_file_location("_python_env_schema_models", schema_path)
+    if spec is None or spec.loader is None:  # pragma: no cover
+        raise ImportError(f"Unable to load schema models from {schema_path}")
+    if spec.name in sys.modules:
+        return sys.modules[spec.name]
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[spec.name] = module
+    spec.loader.exec_module(module)
+    for model_name in (
+        "HistoryEntry",
+        "RewardDetails",
+        "PythonCodeReviewAction",
+        "PythonCodeReviewObservation",
+        "PythonCodeReviewState",
+        "TaskDescriptor",
+        "TaskSummary",
+        "TaskGrade",
+        "HealthResponse",
+    ):
+        getattr(module, model_name).model_rebuild()
+    return module
+_schema_models = _load_schema_module()
+HealthResponse = _schema_models.HealthResponse
+HistoryEntry = _schema_models.HistoryEntry
+PythonAction = _schema_models.PythonAction
+PythonCodeReviewAction = _schema_models.PythonCodeReviewAction
+PythonCodeReviewObservation = _schema_models.PythonCodeReviewObservation
+PythonCodeReviewState = _schema_models.PythonCodeReviewState
+PythonObservation = _schema_models.PythonObservation
+PythonState = _schema_models.PythonState
+RewardDetails = _schema_models.RewardDetails
+TaskDescriptor = _schema_models.TaskDescriptor
+TaskGrade = _schema_models.TaskGrade
+TaskSummary = _schema_models.TaskSummary
+__all__ = [
+    "HealthResponse",
+    "HistoryEntry",
+    "PyTorchCodeAnalyzerModel",
+    "PythonAction",
+    "PythonCodeReviewAction",
+    "PythonCodeReviewObservation",
+    "PythonCodeReviewState",
+    "PythonObservation",
+    "PythonState",
+    "RewardDetails",
+    "TaskDescriptor",
+    "TaskGrade",
+    "TaskSummary",
+]

build/lib/build/lib/models/pytorch_model.py ADDED Viewed

	@@ -0,0 +1,149 @@

+"""PyTorch + transformers model wrapper for multi-domain code scoring."""
+from __future__ import annotations
+import hashlib
+from typing import Dict, List, Sequence
+import torch
+import torch.nn.functional as F
+try:
+    from transformers import AutoModel, AutoTokenizer
+except Exception:
+    AutoModel = None  # type: ignore[assignment]
+    AutoTokenizer = None  # type: ignore[assignment]
+DOMAIN_PROTOTYPES: Dict[str, List[str]] = {
+    "dsa": [
+        "Binary search, hashmap optimization, recursion, dynamic programming, arrays, trees, graphs, stack, queue, complexity.",
+        "Competitive programming algorithm with loops, memoization, prefix sums, and asymptotic analysis.",
+    ],
+    "data_science": [
+        "Pandas dataframe transformation, numpy vectorization, feature leakage, train test split, iterrows misuse.",
+        "Data cleaning pipeline using pandas, numpy, aggregation, joins, and vectorized operations.",
+    ],
+    "ml_dl": [
+        "PyTorch model, training loop, optimizer, backward pass, eval mode, no_grad, loss function, dataloader.",
+        "Machine learning inference and training code with torch, sklearn, tensors, gradients, and model checkpoints.",
+    ],
+    "web": [
+        "FastAPI endpoint, request validation, Pydantic models, async routes, API security, backend service design.",
+        "REST API backend with routers, dependency injection, input validation, serialization, and error handling.",
+    ],
+    "general": [
+        "General Python utility code with readable structure, typing, tests, and maintainable abstractions.",
+    ],
+}
+QUALITY_ANCHORS: Dict[str, List[str]] = {
+    "high": [
+        "Readable typed Python code with validation, efficient algorithms, vectorized operations, safe inference, and clean API boundaries.",
+        "Production-ready code with small functions, docstrings, low complexity, and clear error handling.",
+    ],
+    "low": [
+        "Brute-force nested loops, missing validation, unsafe input handling, missing eval mode, missing no_grad, and code smells.",
+        "Hard to maintain code with high complexity, repeated scans, mutable side effects, and unclear structure.",
+    ],
+}
+class _HashEmbeddingBackend:
+    """Torch-native fallback when pretrained weights cannot be loaded."""
+    def __init__(self, dimensions: int = 128) -> None:
+        self.dimensions = dimensions
+        self.model_id = "hashed-token-fallback"
+        self.backend_name = "hashed-token-fallback"
+        self.notes = ["Using hashed embeddings because pretrained transformer weights are unavailable."]
+    def embed_texts(self, texts: Sequence[str]) -> torch.Tensor:
+        matrix = torch.zeros((len(texts), self.dimensions), dtype=torch.float32)
+        for row_index, text in enumerate(texts):
+            tokens = text.lower().split()[:512]
+            if not tokens:
+                matrix[row_index, 0] = 1.0
+                continue
+            for token in tokens:
+                digest = hashlib.md5(token.encode("utf-8")).hexdigest()
+                bucket = int(digest[:8], 16) % self.dimensions
+                sign = -1.0 if int(digest[8:10], 16) % 2 else 1.0
+                matrix[row_index, bucket] += sign
+        return F.normalize(matrix + 1e-6, dim=1)
+class PyTorchCodeAnalyzerModel:
+    """Score code using pretrained transformer embeddings plus prototype similarity."""
+    def __init__(self, model_id: str = "huggingface/CodeBERTa-small-v1") -> None:
+        self.model_id = model_id
+        self.backend_name = model_id
+        self.notes: List[str] = []
+        self._tokenizer = None
+        self._model = None
+        self._fallback = _HashEmbeddingBackend()
+        self._prototype_cache: Dict[str, torch.Tensor] = {}
+    def _ensure_loaded(self) -> None:
+        if self._model is not None or self.notes:
+            return
+        if AutoTokenizer is None or AutoModel is None:
+            self.backend_name = self._fallback.backend_name
+            self.notes = list(self._fallback.notes)
+            return
+        try:
+            self._tokenizer = AutoTokenizer.from_pretrained(self.model_id)
+            self._model = AutoModel.from_pretrained(self.model_id)
+            self._model.eval()
+            self.notes.append(f"Loaded pretrained encoder `{self.model_id}`.")
+        except Exception as exc:
+            self.backend_name = self._fallback.backend_name
+            self.notes = list(self._fallback.notes) + [f"Pretrained load failed: {type(exc).__name__}: {exc}"]
+    def _embed_texts(self, texts: Sequence[str]) -> torch.Tensor:
+        self._ensure_loaded()
+        if self._model is None or self._tokenizer is None:
+            return self._fallback.embed_texts(texts)
+        encoded = self._tokenizer(list(texts), padding=True, truncation=True, max_length=256, return_tensors="pt")
+        with torch.no_grad():
+            outputs = self._model(**encoded)
+            hidden = outputs.last_hidden_state
+            mask = encoded["attention_mask"].unsqueeze(-1)
+            pooled = (hidden * mask).sum(dim=1) / mask.sum(dim=1).clamp(min=1)
+        return F.normalize(pooled, dim=1)
+    def _prototype_matrix(self, bucket: str, texts: Sequence[str]) -> torch.Tensor:
+        if bucket not in self._prototype_cache:
+            self._prototype_cache[bucket] = self._embed_texts(texts)
+        return self._prototype_cache[bucket]
+    def predict(self, code: str, context_window: str, static_summary: Dict[str, object]) -> Dict[str, object]:
+        """Predict domain probabilities and a model quality score."""
+        document = (
+            f"Code:\n{code.strip()[:4000]}\n\n"
+            f"Context:\n{context_window.strip()[:1000]}\n\n"
+            f"Static hints:\n{static_summary}\n"
+        )
+        candidate = self._embed_texts([document])
+        domain_scores: Dict[str, float] = {}
+        for domain, texts in DOMAIN_PROTOTYPES.items():
+            matrix = self._prototype_matrix(f"domain:{domain}", texts)
+            similarity = torch.matmul(candidate, matrix.T).max().item()
+            domain_scores[domain] = round((similarity + 1.0) / 2.0, 4)
+        high_matrix = self._prototype_matrix("quality:high", QUALITY_ANCHORS["high"])
+        low_matrix = self._prototype_matrix("quality:low", QUALITY_ANCHORS["low"])
+        high_similarity = torch.matmul(candidate, high_matrix.T).max().item()
+        low_similarity = torch.matmul(candidate, low_matrix.T).max().item()
+        ml_quality_score = torch.sigmoid(torch.tensor((high_similarity - low_similarity) * 4.0)).item()
+        return {
+            "domain_scores": domain_scores,
+            "ml_quality_score": round(float(ml_quality_score), 4),
+            "backend_name": self.backend_name,
+            "model_id": self.model_id,
+            "notes": list(self.notes),
+        }

build/lib/build/lib/schemas/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""Public schemas for the multi-domain analysis platform."""
+from .request import AnalyzeCodeRequest
+from .response import AnalyzeCodeResponse, AnalysisIssue, DomainAnalysis, ScoreBreakdown, StaticAnalysisSummary
+__all__ = [
+    "AnalyzeCodeRequest",
+    "AnalyzeCodeResponse",
+    "AnalysisIssue",
+    "DomainAnalysis",
+    "ScoreBreakdown",
+    "StaticAnalysisSummary",
+]

build/lib/build/lib/schemas/request.py ADDED Viewed

	@@ -0,0 +1,19 @@

+"""Request schemas for code analysis endpoints and UI."""
+from __future__ import annotations
+from typing import Literal
+from pydantic import BaseModel, Field
+DomainHint = Literal["auto", "dsa", "data_science", "ml_dl", "web"]
+class AnalyzeCodeRequest(BaseModel):
+    """Validated input payload for multi-domain code analysis."""
+    code: str = Field(..., min_length=1, description="Source code to analyze.")
+    context_window: str = Field(default="", max_length=2000, description="Optional repository or task context.")
+    traceback_text: str = Field(default="", max_length=2000, description="Optional runtime or test failure output.")
+    domain_hint: DomainHint = Field(default="auto", description="Optional domain override when auto detection is not desired.")

build/lib/build/lib/schemas/response.py ADDED Viewed

	@@ -0,0 +1,73 @@

+"""Response schemas for the multi-domain analysis platform."""
+from __future__ import annotations
+from typing import Dict, List, Literal
+from pydantic import BaseModel, Field
+DomainType = Literal["dsa", "data_science", "ml_dl", "web", "general"]
+Severity = Literal["low", "medium", "high"]
+class AnalysisIssue(BaseModel):
+    """One detected issue or risk in the code snippet."""
+    title: str
+    severity: Severity
+    description: str
+    line_hint: int | None = None
+class StaticAnalysisSummary(BaseModel):
+    """Language-agnostic static-analysis signals."""
+    syntax_valid: bool
+    syntax_error: str = ""
+    cyclomatic_complexity: int = Field(..., ge=1)
+    line_count: int = Field(..., ge=0)
+    max_loop_depth: int = Field(..., ge=0)
+    time_complexity: str = "Unknown"
+    space_complexity: str = "Unknown"
+    detected_imports: List[str] = Field(default_factory=list)
+    code_smells: List[str] = Field(default_factory=list)
+class DomainAnalysis(BaseModel):
+    """Domain-specific analysis payload returned by an analyzer."""
+    domain: DomainType
+    domain_score: float = Field(..., ge=0.0, le=1.0)
+    issues: List[AnalysisIssue] = Field(default_factory=list)
+    suggestions: List[str] = Field(default_factory=list)
+    highlights: Dict[str, float | str] = Field(default_factory=dict)
+class ScoreBreakdown(BaseModel):
+    """Reward inputs and final normalized score."""
+    ml_score: float = Field(..., ge=0.0, le=1.0)
+    domain_score: float = Field(..., ge=0.0, le=1.0)
+    lint_score: float = Field(..., ge=0.0, le=1.0)
+    complexity_penalty: float = Field(..., ge=0.0, le=1.0)
+    quality_signal: float = Field(..., ge=0.0, le=1.0)
+    error_reduction_signal: float = Field(..., ge=0.0, le=1.0)
+    completion_signal: float = Field(..., ge=0.0, le=1.0)
+    reward: float = Field(..., ge=0.0, le=1.0)
+class AnalyzeCodeResponse(BaseModel):
+    """Top-level structured output for API and UI consumers."""
+    detected_domain: DomainType
+    domain_confidences: Dict[str, float]
+    score_breakdown: ScoreBreakdown
+    static_analysis: StaticAnalysisSummary
+    domain_analysis: DomainAnalysis
+    improvement_plan: List[str] = Field(default_factory=list)
+    model_backend: str
+    model_id: str
+    summary: str
+    context_window: str = ""
+    analysis_time_ms: float = Field(..., ge=0.0)

build/lib/build/lib/server/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""Server exports for python_code_review_env."""
+from .app import app
+from .env import PythonCodeReviewEnvironment
+__all__ = ["app", "PythonCodeReviewEnvironment"]

build/lib/build/lib/server/app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""OpenEnv FastAPI entrypoint with optional Gradio mounting."""
+from __future__ import annotations
+import os
+from fastapi import FastAPI
+try:
+    from openenv.core.env_server.http_server import create_app
+except Exception as exc:  # pragma: no cover
+    raise ImportError(
+        "openenv-core is required to run the API server. Install project dependencies first."
+    ) from exc
+try:
+    import gradio as gr
+except Exception:
+    gr = None  # type: ignore[assignment]
+try:
+    from ..models import PythonCodeReviewAction, PythonCodeReviewObservation
+    from .env import PythonCodeReviewEnvironment
+except ImportError:
+    from models import PythonCodeReviewAction, PythonCodeReviewObservation
+    from server.env import PythonCodeReviewEnvironment
+def _gradio_enabled() -> bool:
+    for env_name in ("ENABLE_GRADIO_DEMO", "ENABLE_WEB_INTERFACE"):
+        if str(os.getenv(env_name, "")).strip().lower() in {"1", "true", "yes", "on"}:
+            return True
+    return False
+def _max_concurrent_envs() -> int:
+    try:
+        return max(int(os.getenv("OPENENV_MAX_CONCURRENT_ENVS", "2")), 1)
+    except Exception:
+        return 2
+def build_application():
+    """Compose the OpenEnv API with the Gradio demo frontend."""
+    api_app = create_app(
+        PythonCodeReviewEnvironment,
+        PythonCodeReviewAction,
+        PythonCodeReviewObservation,
+        env_name="python_code_review_env",
+        max_concurrent_envs=_max_concurrent_envs(),
+    )
+    served_app = api_app
+    if gr is not None and _gradio_enabled():
+        try:
+            from .demo import build_demo
+        except ImportError:
+            from server.demo import build_demo
+        served_app = gr.mount_gradio_app(api_app, build_demo(), path="/")
+    wrapper_app = FastAPI(title="python_code_review_env", version="1.0.0")
+    @wrapper_app.get("/health", include_in_schema=False)
+    def _health() -> dict[str, str]:
+        return {"status": "ok"}
+    wrapper_app.mount("/", served_app)
+    return wrapper_app
+app = build_application()
+def main(host: str = "0.0.0.0", port: int = 8000) -> None:
+    import uvicorn
+    uvicorn.run(app, host=host, port=port)
+if __name__ == "__main__":
+    main()