juand-r commited on
Commit
0d2fd72
·
verified ·
1 Parent(s): 860795f

Upload folder using huggingface_hub

Browse files
Files changed (45) hide show
  1. docs/managing_presets.md +194 -0
  2. docs/plans/2026-03-07-research-dashboard-design.md +86 -0
  3. frontend/index.html +12 -0
  4. frontend/package-lock.json +0 -0
  5. frontend/package.json +31 -0
  6. frontend/postcss.config.js +6 -0
  7. frontend/src/App.tsx +67 -0
  8. frontend/src/ThemeToggle.tsx +35 -0
  9. frontend/src/config.ts +10 -0
  10. frontend/src/experiments/ExperimentsApp.tsx +71 -0
  11. frontend/src/experiments/api.ts +100 -0
  12. frontend/src/experiments/components/ArtifactsTab.tsx +343 -0
  13. frontend/src/experiments/components/ExperimentDetail.tsx +464 -0
  14. frontend/src/experiments/components/ExperimentList.tsx +154 -0
  15. frontend/src/experiments/components/Markdown.tsx +65 -0
  16. frontend/src/experiments/components/NoteView.tsx +43 -0
  17. frontend/src/experiments/components/SubExperimentView.tsx +154 -0
  18. frontend/src/experiments/components/SummaryFindingsView.tsx +38 -0
  19. frontend/src/experiments/components/TimelineTab.tsx +228 -0
  20. frontend/src/experiments/components/viewers/HeatmapViewer.tsx +2158 -0
  21. frontend/src/experiments/components/viewers/ImageViewer.tsx +319 -0
  22. frontend/src/experiments/components/viewers/PlotlyViewer.tsx +422 -0
  23. frontend/src/experiments/components/viewers/TableViewer.tsx +440 -0
  24. frontend/src/experiments/components/viewers/YamlViewer.tsx +588 -0
  25. frontend/src/experiments/store.ts +205 -0
  26. frontend/src/experiments/types.ts +201 -0
  27. frontend/src/hashRouter.ts +161 -0
  28. frontend/src/index.css +145 -0
  29. frontend/src/main.tsx +10 -0
  30. frontend/src/model/ModelApp.tsx +228 -0
  31. frontend/src/model/api.ts +63 -0
  32. frontend/src/model/components/InfoBar.tsx +91 -0
  33. frontend/src/model/components/QuestionNav.tsx +112 -0
  34. frontend/src/model/components/Sidebar.tsx +378 -0
  35. frontend/src/model/components/TracePanel.tsx +171 -0
  36. frontend/src/model/store.ts +270 -0
  37. frontend/src/model/types.ts +55 -0
  38. frontend/src/model/utils/promptParser.ts +93 -0
  39. frontend/src/model/utils/traceHighlight.ts +37 -0
  40. frontend/src/visualizer/VisualizerApp.tsx +55 -0
  41. frontend/src/vite-env.d.ts +1 -0
  42. frontend/tailwind.config.js +9 -0
  43. frontend/tsconfig.app.json +21 -0
  44. frontend/tsconfig.json +4 -0
  45. frontend/vite.config.ts +18 -0
docs/managing_presets.md ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Managing RACA-VIS-PRESETS Programmatically
2
+
3
+ ## Overview
4
+
5
+ The agg_visualizer stores presets in the HuggingFace dataset repo `your-org/RACA-VIS-PRESETS`. Each visualizer type has its own JSON file:
6
+
7
+ | Type | File | Extra Fields |
8
+ |------|------|-------------|
9
+ | `model` | `model_presets.json` | `column` (default: `"model_responses"`) |
10
+ | `arena` | `arena_presets.json` | none |
11
+ | `rlm` | `rlm_presets.json` | `config` (default: `"rlm_call_traces"`) |
12
+ | `harbor` | `harbor_presets.json` | none |
13
+
14
+ ## Preset Schema
15
+
16
+ Every preset has these base fields:
17
+
18
+ ```json
19
+ {
20
+ "id": "8-char hex",
21
+ "name": "Human-readable name",
22
+ "repo": "org/dataset-name",
23
+ "split": "train"
24
+ }
25
+ ```
26
+
27
+ Plus type-specific fields listed above.
28
+
29
+ ## How to Add Presets from Experiment Markdown Files
30
+
31
+ ### Step 1: Identify repos and their visualizer type
32
+
33
+ Read the experiment markdown file(s) and extract all HuggingFace repo links. Categorize each:
34
+
35
+ - **Countdown / MuSR datasets** (model response traces) → `model` type, set `column: "response"`
36
+ - **FrozenLake / arena datasets** (game episodes) → `arena` type
37
+ - **Harbor / SWE-bench datasets** → `harbor` type
38
+ - **RLM call traces** → `rlm` type, set `config: "rlm_call_traces"`
39
+
40
+ ### Step 2: Download existing presets from HF
41
+
42
+ ```python
43
+ from huggingface_hub import hf_hub_download
44
+ import json
45
+
46
+ PRESETS_REPO = "your-org/RACA-VIS-PRESETS"
47
+
48
+ def load_hf_presets(vis_type):
49
+ try:
50
+ path = hf_hub_download(PRESETS_REPO, f"{vis_type}_presets.json", repo_type="dataset")
51
+ with open(path) as f:
52
+ return json.load(f)
53
+ except Exception:
54
+ return []
55
+
56
+ existing_model = load_hf_presets("model")
57
+ existing_arena = load_hf_presets("arena")
58
+ # ... etc for rlm, harbor
59
+
60
+ # Build set of repos already present
61
+ existing_repos = set()
62
+ for presets_list in [existing_model, existing_arena]:
63
+ for p in presets_list:
64
+ existing_repos.add(p["repo"])
65
+ ```
66
+
67
+ ### Step 3: Build new presets, skipping duplicates
68
+
69
+ ```python
70
+ import uuid
71
+
72
+ new_presets = [] # list of (vis_type, name, repo)
73
+
74
+ # Example: adding strategy compliance countdown presets
75
+ new_presets.append(("model", "SC Countdown K2-Inst TreeSearch",
76
+ "your-org/t1-strategy-countdown-treesearch-kimi-k2-instruct-kimi-inst"))
77
+
78
+ # ... add all repos from the markdown ...
79
+
80
+ # Filter out existing
81
+ to_add = {"model": [], "arena": [], "rlm": [], "harbor": []}
82
+ for vis_type, name, repo in new_presets:
83
+ if repo in existing_repos:
84
+ continue # skip duplicates
85
+ preset = {
86
+ "id": uuid.uuid4().hex[:8],
87
+ "name": name,
88
+ "repo": repo,
89
+ "split": "train",
90
+ }
91
+ if vis_type == "model":
92
+ preset["column"] = "response"
93
+ elif vis_type == "rlm":
94
+ preset["config"] = "rlm_call_traces"
95
+ to_add[vis_type].append(preset)
96
+ ```
97
+
98
+ ### Step 4: Merge and upload to HF
99
+
100
+ ```python
101
+ import tempfile, os
102
+ from huggingface_hub import HfApi
103
+
104
+ api = HfApi()
105
+
106
+ # Merge new presets with existing
107
+ final_model = existing_model + to_add["model"]
108
+ final_arena = existing_arena + to_add["arena"]
109
+
110
+ for vis_type, presets in [("model", final_model), ("arena", final_arena)]:
111
+ if not presets:
112
+ continue
113
+ with tempfile.NamedTemporaryFile("w", suffix=".json", delete=False) as f:
114
+ json.dump(presets, f, indent=2)
115
+ tmp = f.name
116
+ api.upload_file(
117
+ path_or_fileobj=tmp,
118
+ path_in_repo=f"{vis_type}_presets.json",
119
+ repo_id=PRESETS_REPO,
120
+ repo_type="dataset",
121
+ )
122
+ os.unlink(tmp)
123
+ ```
124
+
125
+ ### Step 5: Sync the deployed HF Space
126
+
127
+ After uploading to the HF dataset, tell the running Space to re-download presets:
128
+
129
+ ```bash
130
+ curl -X POST "https://your-org-agg-trace-visualizer.hf.space/api/presets/sync"
131
+ ```
132
+
133
+ This forces the Space to re-download all preset files from `RACA-VIS-PRESETS` without needing a restart or redeployment.
134
+
135
+ ### Step 6: Sync local preset files
136
+
137
+ ```python
138
+ import shutil
139
+ from huggingface_hub import hf_hub_download
140
+
141
+ local_dir = Path(__file__).parent.parent / "backend" / "presets"
142
+ for vis_type in ["model", "arena", "rlm", "harbor"]:
143
+ try:
144
+ path = hf_hub_download(PRESETS_REPO, f"{vis_type}_presets.json", repo_type="dataset")
145
+ shutil.copy2(path, f"{local_dir}/{vis_type}_presets.json")
146
+ except Exception:
147
+ pass
148
+ ```
149
+
150
+ ## Naming Convention
151
+
152
+ Preset names follow this pattern to be descriptive and avoid future conflicts:
153
+
154
+ ```
155
+ {Experiment} {Task} {Model} {Variant}
156
+ ```
157
+
158
+ ### Experiment prefixes
159
+ - `SC` — Strategy Compliance
160
+ - `Wing` — Wingdings Compliance
161
+
162
+ ### Model abbreviations
163
+ - `K2-Inst` — Kimi-K2-Instruct (RLHF)
164
+ - `K2-Think` — Kimi-K2-Thinking (RLVR)
165
+ - `Q3-Inst` — Qwen3-Next-80B Instruct (RLHF)
166
+ - `Q3-Think` — Qwen3-Next-80B Thinking (RLVR)
167
+
168
+ ### Task names
169
+ - `Countdown` — 8-arg arithmetic countdown
170
+ - `MuSR` — MuSR murder mysteries
171
+ - `FrozenLake` — FrozenLake grid navigation
172
+
173
+ ### Variant names (strategy compliance only)
174
+ - `TreeSearch` / `Baseline` / `Anti` — countdown tree search experiment
175
+ - `CritFirst` / `Anti-CritFirst` — criterion-first cross-cutting analysis
176
+ - `Counterfactual` / `Anti-Counterfactual` — counterfactual hypothesis testing
177
+ - `BackChain` — backward chaining (FrozenLake)
178
+
179
+ ### Examples
180
+
181
+ ```
182
+ SC Countdown K2-Inst TreeSearch # Strategy compliance, countdown, Kimi instruct, tree search variant
183
+ SC MuSR Q3-Think Counterfactual # Strategy compliance, MuSR, Qwen thinking, counterfactual variant
184
+ SC FrozenLake K2-Think BackChain # Strategy compliance, FrozenLake, Kimi thinking, backward chaining
185
+ Wing Countdown Q3-Inst # Wingdings, countdown, Qwen instruct (no variant — wingdings has one condition)
186
+ Wing MuSR K2-Think # Wingdings, MuSR, Kimi thinking
187
+ ```
188
+
189
+ ## Important Notes
190
+
191
+ - **Always check for existing repos** before adding. The script above uses `existing_repos` set to skip duplicates.
192
+ - **The `column` field matters for model presets.** Strategy compliance and wingdings datasets use `"response"` as the response column, not the default `"model_responses"`.
193
+ - **Local files are fallback cache.** The agg_visualizer downloads from HF on startup and caches locally. After uploading to HF, sync the local files so the running app picks up changes without restart (or hit the `/api/presets/sync` endpoint).
194
+ - **Don't modify rlm or harbor presets** unless adding datasets of those types. The script above only touches model and arena.
docs/plans/2026-03-07-research-dashboard-design.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Research Dashboard Design
2
+
3
+ **Date:** 2026-03-07
4
+ **Status:** Approved
5
+
6
+ ## Overview
7
+
8
+ Extend the existing agg_visualizer into a parent "Research Dashboard" website with a top-level navigation bar. The current visualizer becomes one page; a new Experiments page provides a control pane for tracking experiments, hypotheses, runs, and artifacts.
9
+
10
+ Deployed on HuggingFace Spaces (same Space as the current visualizer).
11
+
12
+ ## Audience
13
+
14
+ Primarily the researcher + advisor. May expand to a small team later.
15
+
16
+ ## Architecture
17
+
18
+ ### Navigation
19
+ - Top-level nav bar: `Experiments | Visualizer` (future: Research Map, Knowledge Base)
20
+ - State-driven view switching (useState), not URL routing (HF Spaces doesn't support deep-linking)
21
+ - Current visualizer tabs (Model Trace, Arena, RLM, etc.) nest inside the Visualizer page unchanged
22
+
23
+ ### Data Storage
24
+ - JSON files in HF dataset repo `your-org/RACA_DASHBOARD`
25
+ - Three files: `experiments.json`, `runs.json`, `sub_experiments.json`
26
+ - In-memory cache with async HF upload (same pattern as presets.py)
27
+ - Local JSON fallback in `backend/data/`
28
+
29
+ ### Backend API
30
+ Blueprint at `/api/experiments/`:
31
+
32
+ | Method | Path | Purpose |
33
+ |--------|------|---------|
34
+ | GET | `/` | List all experiments |
35
+ | POST | `/` | Create experiment |
36
+ | GET | `/:id` | Full detail (includes runs + subs) |
37
+ | PUT | `/:id` | Update experiment |
38
+ | DELETE | `/:id` | Delete experiment |
39
+ | POST | `/:id/runs` | Add run record |
40
+ | PUT | `/:id/runs/:run_id` | Update run |
41
+ | DELETE | `/:id/runs/:run_id` | Delete run |
42
+ | POST | `/:id/subs` | Add sub-experiment |
43
+ | PUT | `/:id/subs/:sub_id` | Update sub-experiment |
44
+ | DELETE | `/:id/subs/:sub_id` | Delete sub-experiment |
45
+ | POST | `/sync` | Force re-download from HF |
46
+ | POST | `/import` | Bulk import (experiment.yaml format) |
47
+
48
+ ### Data Model
49
+
50
+ **Experiment:**
51
+ - id, name, research_project, hypothesis (statement, type, status, success_criteria)
52
+ - stage, completeness (0-5), models[], tasks[], tags[]
53
+ - hf_repos[] (repo, description, date), wandb_url, notes (markdown)
54
+ - created, updated timestamps
55
+
56
+ **Run Record:**
57
+ - id, experiment_id, condition, model, cluster, status
58
+ - hf_dataset, metrics (dict), timestamp, notes
59
+
60
+ **Sub-experiment:**
61
+ - id, experiment_id, name, hypothesis, status
62
+ - content_md (full markdown report), hf_repos[]
63
+ - created, updated timestamps
64
+
65
+ ### Frontend
66
+
67
+ Three drill-down levels:
68
+
69
+ 1. **Experiment List** — Cards with name, hypothesis, status badge, completeness, tags, last updated. Sort/filter controls.
70
+ 2. **Experiment Detail** — Hypothesis header, tabbed views (Overview, Runs, Datasets, Sub-experiments). Inline editing.
71
+ 3. **Sub-experiment View** — Breadcrumb, header, markdown-rendered body, HF repos, edit toggle.
72
+
73
+ ### Integration Points
74
+ - exp-runner v2 pushes data via `/api/experiments/import`
75
+ - Flexible ingestion — API accepts data from any source
76
+ - No local filesystem dependency at runtime
77
+
78
+ ## Future Pages (Phase 2+)
79
+ - **Research Map** — Graph/board view of research directions and experiment relationships
80
+ - **Knowledge Base** — Searchable wiki of findings, notes, HF repos
81
+
82
+ ## Tech Stack
83
+ - Backend: Flask (existing)
84
+ - Frontend: React + Vite + Tailwind + Zustand (existing)
85
+ - Deployment: Docker on HuggingFace Spaces (existing)
86
+ - Storage: HF dataset repo as JSON store
frontend/index.html ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en" class="dark">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>RACA Dashboard</title>
7
+ </head>
8
+ <body class="bg-gray-950 text-gray-100">
9
+ <div id="root"></div>
10
+ <script type="module" src="/src/main.tsx"></script>
11
+ </body>
12
+ </html>
frontend/package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
frontend/package.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "agg-visualizer",
3
+ "private": true,
4
+ "version": "0.1.0",
5
+ "type": "module",
6
+ "scripts": {
7
+ "dev": "vite",
8
+ "build": "tsc -b && vite build",
9
+ "preview": "vite preview"
10
+ },
11
+ "dependencies": {
12
+ "apache-arrow": "^21.1.0",
13
+ "hyparquet": "^1.25.5",
14
+ "react": "^18.3.1",
15
+ "react-dom": "^18.3.1",
16
+ "react-markdown": "^10.1.0",
17
+ "rehype-katex": "^7.0.1",
18
+ "remark-gfm": "^4.0.1",
19
+ "remark-math": "^6.0.0"
20
+ },
21
+ "devDependencies": {
22
+ "@types/react": "^18.3.12",
23
+ "@types/react-dom": "^18.3.1",
24
+ "@vitejs/plugin-react": "^4.3.4",
25
+ "autoprefixer": "^10.4.20",
26
+ "postcss": "^8.4.49",
27
+ "tailwindcss": "^3.4.15",
28
+ "typescript": "^5.6.3",
29
+ "vite": "^6.0.3"
30
+ }
31
+ }
frontend/postcss.config.js ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ export default {
2
+ plugins: {
3
+ tailwindcss: {},
4
+ autoprefixer: {},
5
+ },
6
+ };
frontend/src/App.tsx ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { lazy, Suspense } from "react";
2
+ import { useHashRoute, navigateTo, useCopyLink } from "./hashRouter";
3
+ import ThemeToggle, { useTheme } from "./ThemeToggle";
4
+
5
+ const VisualizerApp = lazy(() => import("./visualizer/VisualizerApp"));
6
+ const ExperimentsApp = lazy(() => import("./experiments/ExperimentsApp"));
7
+
8
+ type PageId = "experiments" | "visualizer";
9
+
10
+ const PAGES: { id: PageId; label: string }[] = [
11
+ { id: "experiments", label: "Experiments" },
12
+ { id: "visualizer", label: "Visualizer" },
13
+ ];
14
+
15
+ export default function App() {
16
+ const route = useHashRoute();
17
+ const activePage: PageId = route.page === "viz" ? "visualizer" : "experiments";
18
+ const { copyLink, copied } = useCopyLink();
19
+ const { dark, toggle } = useTheme();
20
+
21
+ return (
22
+ <div className="h-screen flex flex-col bg-gray-950 text-gray-100">
23
+ {/* Top navigation bar */}
24
+ <div className="flex items-center border-b border-gray-700 bg-gray-900 px-4 shrink-0">
25
+ <span className="text-sm font-semibold text-gray-300 mr-6 py-2.5">
26
+ Research Dashboard
27
+ </span>
28
+ {PAGES.map((page) => (
29
+ <button
30
+ key={page.id}
31
+ onClick={() => navigateTo({ page: page.id === "visualizer" ? "viz" : "experiments" })}
32
+ className={`px-4 py-2.5 text-sm font-medium border-b-2 transition-colors ${
33
+ activePage === page.id
34
+ ? "border-cyan-500 text-cyan-400"
35
+ : "border-transparent text-gray-500 hover:text-gray-300"
36
+ }`}
37
+ >
38
+ {page.label}
39
+ </button>
40
+ ))}
41
+ <div className="ml-auto flex items-center gap-3">
42
+ <ThemeToggle dark={dark} toggle={toggle} />
43
+ <button
44
+ onClick={copyLink}
45
+ className="px-2.5 py-1 text-xs font-medium rounded border border-gray-600 text-gray-400 hover:text-gray-200 hover:border-gray-400 transition-colors"
46
+ >
47
+ {copied ? "Copied!" : "Copy Link"}
48
+ </button>
49
+ </div>
50
+ </div>
51
+
52
+ {/* Active page */}
53
+ <div className="flex-1 overflow-hidden">
54
+ <Suspense
55
+ fallback={
56
+ <div className="flex items-center justify-center h-full text-gray-500">
57
+ Loading...
58
+ </div>
59
+ }
60
+ >
61
+ {activePage === "experiments" && <ExperimentsApp />}
62
+ {activePage === "visualizer" && <VisualizerApp />}
63
+ </Suspense>
64
+ </div>
65
+ </div>
66
+ );
67
+ }
frontend/src/ThemeToggle.tsx ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useEffect, useState } from "react";
2
+
3
+ export function useTheme() {
4
+ const [dark, setDark] = useState(() => {
5
+ if (typeof window === "undefined") return true;
6
+ const stored = localStorage.getItem("raca-theme");
7
+ return stored ? stored === "dark" : true; // dark by default
8
+ });
9
+
10
+ useEffect(() => {
11
+ const root = document.documentElement;
12
+ if (dark) {
13
+ root.classList.add("dark");
14
+ root.classList.remove("light");
15
+ } else {
16
+ root.classList.add("light");
17
+ root.classList.remove("dark");
18
+ }
19
+ localStorage.setItem("raca-theme", dark ? "dark" : "light");
20
+ }, [dark]);
21
+
22
+ return { dark, toggle: () => setDark((d) => !d) };
23
+ }
24
+
25
+ export default function ThemeToggle({ dark, toggle }: { dark: boolean; toggle: () => void }) {
26
+ return (
27
+ <button
28
+ onClick={toggle}
29
+ className="px-2 py-1 text-xs rounded border border-gray-600 dark:border-gray-600 text-gray-500 dark:text-gray-400 hover:text-gray-800 dark:hover:text-gray-200 transition-colors"
30
+ title={dark ? "Switch to light mode" : "Switch to dark mode"}
31
+ >
32
+ {dark ? "☀️" : "🌙"}
33
+ </button>
34
+ );
35
+ }
frontend/src/config.ts ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Global configuration for the visualizer.
3
+ * Set HF_ORG via environment variable (VITE_HF_ORG) at build time,
4
+ * or override at runtime by setting window.__HF_ORG__.
5
+ */
6
+
7
+ export const HF_ORG: string =
8
+ (typeof window !== "undefined" && (window as any).__HF_ORG__) ||
9
+ import.meta.env.VITE_HF_ORG ||
10
+ "your-org";
frontend/src/experiments/ExperimentsApp.tsx ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useExperimentsState } from "./store";
2
+ import ExperimentList from "./components/ExperimentList";
3
+ import ExperimentDetail from "./components/ExperimentDetail";
4
+ import SubExperimentView from "./components/SubExperimentView";
5
+ import NoteView from "./components/NoteView";
6
+
7
+ export default function ExperimentsApp() {
8
+ const state = useExperimentsState();
9
+
10
+ if (state.loading && state.experiments.length === 0) {
11
+ return (
12
+ <div className="flex items-center justify-center h-full text-gray-500">
13
+ Loading experiments...
14
+ </div>
15
+ );
16
+ }
17
+
18
+ if (state.error && state.experiments.length === 0) {
19
+ return (
20
+ <div className="flex flex-col items-center justify-center h-full text-gray-500">
21
+ <p className="text-red-400 mb-2">{state.error}</p>
22
+ <button
23
+ onClick={state.loadExperiments}
24
+ className="text-cyan-400 hover:text-cyan-300 text-sm"
25
+ >
26
+ Retry
27
+ </button>
28
+ </div>
29
+ );
30
+ }
31
+
32
+ if (state.view.kind === "note" && state.currentNote && state.currentDetail) {
33
+ return (
34
+ <NoteView
35
+ note={state.currentNote}
36
+ experimentName={state.currentDetail.name}
37
+ onBack={() => state.navigateToDetail(state.view.kind === "note" ? state.view.expId : "")}
38
+ />
39
+ );
40
+ }
41
+
42
+ if (state.view.kind === "sub" && state.currentSub && state.currentDetail) {
43
+ return (
44
+ <SubExperimentView
45
+ sub={state.currentSub}
46
+ experimentName={state.currentDetail.name}
47
+ onBack={() => state.navigateToDetail(state.view.kind === "sub" ? state.view.expId : "")}
48
+ onRefresh={state.refreshDetail}
49
+ />
50
+ );
51
+ }
52
+
53
+ if (state.view.kind === "detail" && state.currentDetail) {
54
+ return (
55
+ <ExperimentDetail
56
+ experiment={state.currentDetail}
57
+ onBack={state.navigateToList}
58
+ onSelectNote={(noteId) => state.navigateToNote(state.view.kind === "detail" ? state.view.expId : "", noteId)}
59
+ onRefresh={state.refreshDetail}
60
+ />
61
+ );
62
+ }
63
+
64
+ return (
65
+ <ExperimentList
66
+ experiments={state.experiments}
67
+ onSelect={state.navigateToDetail}
68
+ onRefresh={state.loadExperiments}
69
+ />
70
+ );
71
+ }
frontend/src/experiments/api.ts ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Experiment, ExperimentDetail, SubExperiment, ExperimentNote, ActivityLogEntry, Artifact } from "./types";
2
+
3
+ const BASE = "/api/experiments";
4
+
5
+ async function fetchJSON<T>(url: string, opts?: RequestInit): Promise<T> {
6
+ const res = await fetch(url, {
7
+ headers: { "Content-Type": "application/json" },
8
+ ...opts,
9
+ });
10
+ if (!res.ok) {
11
+ const err = await res.json().catch(() => ({ error: res.statusText }));
12
+ throw new Error(err.error || res.statusText);
13
+ }
14
+ return res.json();
15
+ }
16
+
17
+ export const experimentsApi = {
18
+ list() {
19
+ return fetchJSON<Experiment[]>(`${BASE}/`);
20
+ },
21
+
22
+ get(id: string) {
23
+ return fetchJSON<ExperimentDetail>(`${BASE}/${id}`);
24
+ },
25
+
26
+ create(data: Partial<Experiment>) {
27
+ return fetchJSON<Experiment>(`${BASE}/`, {
28
+ method: "POST",
29
+ body: JSON.stringify(data),
30
+ });
31
+ },
32
+
33
+ update(id: string, data: Partial<Experiment>) {
34
+ return fetchJSON<Experiment>(`${BASE}/${id}`, {
35
+ method: "PUT",
36
+ body: JSON.stringify(data),
37
+ });
38
+ },
39
+
40
+ delete(id: string) {
41
+ return fetchJSON<{ status: string }>(`${BASE}/${id}`, { method: "DELETE" });
42
+ },
43
+
44
+ createSub(expId: string, data: Partial<SubExperiment>) {
45
+ return fetchJSON<SubExperiment>(`${BASE}/${expId}/subs`, {
46
+ method: "POST",
47
+ body: JSON.stringify(data),
48
+ });
49
+ },
50
+
51
+ updateSub(expId: string, subId: string, data: Partial<SubExperiment>) {
52
+ return fetchJSON<SubExperiment>(`${BASE}/${expId}/subs/${subId}`, {
53
+ method: "PUT",
54
+ body: JSON.stringify(data),
55
+ });
56
+ },
57
+
58
+ deleteSub(expId: string, subId: string) {
59
+ return fetchJSON<{ status: string }>(`${BASE}/${expId}/subs/${subId}`, { method: "DELETE" });
60
+ },
61
+
62
+ // Notes
63
+ createNote(expId: string, data: Partial<ExperimentNote>) {
64
+ return fetchJSON<ExperimentNote>(`${BASE}/${expId}/notes`, {
65
+ method: "POST",
66
+ body: JSON.stringify(data),
67
+ });
68
+ },
69
+
70
+ updateNote(expId: string, noteId: string, data: Partial<ExperimentNote>) {
71
+ return fetchJSON<ExperimentNote>(`${BASE}/${expId}/notes/${noteId}`, {
72
+ method: "PUT",
73
+ body: JSON.stringify(data),
74
+ });
75
+ },
76
+
77
+ deleteNote(expId: string, noteId: string) {
78
+ return fetchJSON<{ status: string }>(`${BASE}/${expId}/notes/${noteId}`, { method: "DELETE" });
79
+ },
80
+
81
+ sync() {
82
+ return fetchJSON<{ status: string }>(`${BASE}/sync`, { method: "POST" });
83
+ },
84
+
85
+ getSummary() {
86
+ return fetchJSON<{ content_md: string; updated: string }>(`${BASE}/summary`);
87
+ },
88
+
89
+ getActivityLog(expId: string, filters?: { scope?: string; type?: string }) {
90
+ const params = new URLSearchParams();
91
+ if (filters?.scope) params.set("scope", filters.scope);
92
+ if (filters?.type) params.set("type", filters.type);
93
+ const qs = params.toString();
94
+ return fetchJSON<ActivityLogEntry[]>(`${BASE}/${expId}/activity-log${qs ? `?${qs}` : ""}`);
95
+ },
96
+
97
+ getArtifacts(expId: string) {
98
+ return fetchJSON<Artifact[]>(`${BASE}/${expId}/artifacts`);
99
+ },
100
+ };
frontend/src/experiments/components/ArtifactsTab.tsx ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useEffect, useCallback } from "react";
2
+ import type { Artifact, ArtifactType, HfRepo, RunRecord } from "../types";
3
+ import { HF_ORG } from "../../config";
4
+ import TableViewer from "./viewers/TableViewer";
5
+ import PlotlyViewer from "./viewers/PlotlyViewer";
6
+ import ImageViewer from "./viewers/ImageViewer";
7
+ import YamlViewer from "./viewers/YamlViewer";
8
+ import HeatmapViewer from "./viewers/HeatmapViewer";
9
+
10
+ const ARTIFACT_TYPE_COLORS: Record<ArtifactType, string> = {
11
+ input_data: "bg-cyan-900/50 text-cyan-300 border border-cyan-700/50",
12
+ inference_output: "bg-blue-900/50 text-blue-300 border border-blue-700/50",
13
+ training_config: "bg-amber-900/50 text-amber-300 border border-amber-700/50",
14
+ canary_output: "bg-yellow-900/50 text-yellow-300 border border-yellow-700/50",
15
+ eval_result: "bg-emerald-900/50 text-emerald-300 border border-emerald-700/50",
16
+ processed_data: "bg-purple-900/50 text-purple-300 border border-purple-700/50",
17
+ };
18
+
19
+ const ARTIFACT_TYPE_LABELS: Record<ArtifactType, string> = {
20
+ input_data: "input data",
21
+ inference_output: "inference output",
22
+ training_config: "training config",
23
+ canary_output: "canary output",
24
+ eval_result: "eval result",
25
+ processed_data: "processed data",
26
+ };
27
+
28
+ interface ArtifactsTabProps {
29
+ artifacts: Artifact[];
30
+ hfRepos: HfRepo[];
31
+ runs: RunRecord[];
32
+ onOpenArtifact: (artifact: Artifact) => void;
33
+ }
34
+
35
+ function ExternalLinkIcon() {
36
+ return (
37
+ <svg
38
+ xmlns="http://www.w3.org/2000/svg"
39
+ className="h-3.5 w-3.5"
40
+ fill="none"
41
+ viewBox="0 0 24 24"
42
+ stroke="currentColor"
43
+ strokeWidth={2}
44
+ >
45
+ <path
46
+ strokeLinecap="round"
47
+ strokeLinejoin="round"
48
+ d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14"
49
+ />
50
+ </svg>
51
+ );
52
+ }
53
+
54
+ function ArtifactCard({
55
+ artifact,
56
+ onOpen,
57
+ }: {
58
+ artifact: Artifact;
59
+ onOpen: (a: Artifact) => void;
60
+ }) {
61
+ const typeColorClass =
62
+ artifact.artifact_type
63
+ ? ARTIFACT_TYPE_COLORS[artifact.artifact_type]
64
+ : "bg-gray-800 text-gray-400 border border-gray-700";
65
+ const typeLabel =
66
+ artifact.artifact_type
67
+ ? ARTIFACT_TYPE_LABELS[artifact.artifact_type]
68
+ : "unknown";
69
+
70
+ const fullName = artifact.dataset_name.includes("/") ? artifact.dataset_name : `${HF_ORG}/${artifact.dataset_name}`;
71
+ const hfUrl = `https://huggingface.co/datasets/${fullName}`;
72
+ const shortName = artifact.dataset_name.split("/").pop() ?? artifact.dataset_name;
73
+ const createdDate = artifact.created
74
+ ? new Date(artifact.created).toLocaleDateString()
75
+ : null;
76
+
77
+ return (
78
+ <div className="bg-gray-900 border border-gray-800 rounded-lg p-3 flex flex-col gap-2 hover:border-gray-700 transition-colors">
79
+ {/* Top row: name + badges */}
80
+ <div className="flex items-start justify-between gap-2">
81
+ <div className="min-w-0 flex-1">
82
+ <span className="text-sm text-gray-200 font-medium truncate block" title={artifact.dataset_name}>
83
+ {shortName}
84
+ </span>
85
+ {artifact.dataset_name.includes("/") && (
86
+ <span className="text-xs text-gray-600 truncate block" title={artifact.dataset_name}>
87
+ {artifact.dataset_name.split("/")[0]}/
88
+ </span>
89
+ )}
90
+ </div>
91
+ <div className="flex items-center gap-1.5 flex-shrink-0">
92
+ <span className={`text-xs px-1.5 py-0.5 rounded font-medium ${typeColorClass}`}>
93
+ {typeLabel}
94
+ </span>
95
+ </div>
96
+ </div>
97
+
98
+ {/* Visualizer type */}
99
+ {artifact.visualizer_type && (
100
+ <div>
101
+ <span className="text-xs text-gray-500">
102
+ viewer: <span className="text-gray-400">{artifact.visualizer_type.replace(/_/g, " ")}</span>
103
+ </span>
104
+ </div>
105
+ )}
106
+
107
+ {/* Description */}
108
+ {artifact.description && (
109
+ <p className="text-xs text-gray-400 leading-relaxed line-clamp-2">{artifact.description}</p>
110
+ )}
111
+
112
+ {/* Bottom row: date + actions */}
113
+ <div className="flex items-center justify-between mt-1">
114
+ <span className="text-xs text-gray-600">{createdDate ?? ""}</span>
115
+ <div className="flex items-center gap-2">
116
+ <a
117
+ href={hfUrl}
118
+ target="_blank"
119
+ rel="noopener noreferrer"
120
+ className="text-gray-500 hover:text-cyan-400 transition-colors"
121
+ title="Open on HuggingFace"
122
+ >
123
+ <ExternalLinkIcon />
124
+ </a>
125
+ <button
126
+ onClick={() => onOpen(artifact)}
127
+ className="text-xs bg-cyan-700/40 hover:bg-cyan-600/50 text-cyan-300 px-2 py-0.5 rounded transition-colors border border-cyan-700/30"
128
+ >
129
+ Open
130
+ </button>
131
+ </div>
132
+ </div>
133
+ </div>
134
+ );
135
+ }
136
+
137
+ interface RunGroupProps {
138
+ label: string;
139
+ artifacts: Artifact[];
140
+ defaultOpen?: boolean;
141
+ onOpenArtifact: (a: Artifact) => void;
142
+ }
143
+
144
+ function RunGroup({ label, artifacts, defaultOpen = true, onOpenArtifact }: RunGroupProps) {
145
+ const [open, setOpen] = useState(defaultOpen);
146
+
147
+ return (
148
+ <div className="mb-4">
149
+ <button
150
+ onClick={() => setOpen((o) => !o)}
151
+ className="flex items-center gap-2 w-full text-left mb-2 group"
152
+ >
153
+ <span className={`text-xs transition-transform ${open ? "rotate-90" : ""} text-gray-500`}>▶</span>
154
+ <span className="text-sm font-medium text-gray-300 group-hover:text-gray-200 transition-colors">
155
+ {label}
156
+ </span>
157
+ <span className="text-xs text-gray-600">({artifacts.length})</span>
158
+ </button>
159
+ {open && (
160
+ <div className="grid gap-2 grid-cols-1 md:grid-cols-2 xl:grid-cols-3 pl-4">
161
+ {artifacts.map((artifact) => (
162
+ <ArtifactCard
163
+ key={artifact.dataset_name}
164
+ artifact={artifact}
165
+ onOpen={onOpenArtifact}
166
+ />
167
+ ))}
168
+ </div>
169
+ )}
170
+ </div>
171
+ );
172
+ }
173
+
174
+ const INLINE_VIEWER_TYPES = new Set(["table", "yaml_config", "plotly", "image", "heatmap"]);
175
+
176
+ export default function ArtifactsTab({
177
+ artifacts,
178
+ hfRepos,
179
+ runs,
180
+ onOpenArtifact,
181
+ }: ArtifactsTabProps) {
182
+ const [activeViewer, setActiveViewer] = useState<{
183
+ type: string;
184
+ repo: string;
185
+ } | null>(null);
186
+
187
+ // Close inline viewer on browser back
188
+ useEffect(() => {
189
+ const handler = (e: PopStateEvent) => {
190
+ if (activeViewer && !e.state?.artifactViewer) {
191
+ setActiveViewer(null);
192
+ }
193
+ };
194
+ window.addEventListener("popstate", handler);
195
+ return () => window.removeEventListener("popstate", handler);
196
+ }, [activeViewer]);
197
+
198
+ // Build a run label lookup: run_id → human label
199
+ const runLabelMap = new Map<string, string>(
200
+ runs.map((r) => [r.id, r.condition ? `${r.condition} (${r.id.slice(0, 6)})` : r.id.slice(0, 8)])
201
+ );
202
+
203
+ const handleOpenArtifact = useCallback((artifact: Artifact) => {
204
+ const vtype = artifact.visualizer_type ?? "table";
205
+ if (INLINE_VIEWER_TYPES.has(vtype)) {
206
+ window.history.pushState({ artifactViewer: true }, "");
207
+ setActiveViewer({ type: vtype, repo: artifact.dataset_name });
208
+ } else {
209
+ onOpenArtifact(artifact);
210
+ }
211
+ }, [onOpenArtifact]);
212
+
213
+ const closeViewer = useCallback(() => {
214
+ setActiveViewer(null);
215
+ window.history.back();
216
+ }, []);
217
+
218
+ const renderContent = () => {
219
+ // ---- Case 1: artifacts array non-empty — grouped view ----
220
+ if (artifacts.length > 0) {
221
+ // Group by run_id
222
+ const grouped = new Map<string | null, Artifact[]>();
223
+ for (const artifact of artifacts) {
224
+ const key = artifact.run_id ?? null;
225
+ if (!grouped.has(key)) grouped.set(key, []);
226
+ grouped.get(key)!.push(artifact);
227
+ }
228
+
229
+ // Collect groups (run_id non-null first, null/"Ungrouped" last)
230
+ const groupedEntries: { key: string | null; label: string; artifacts: Artifact[] }[] = [];
231
+ for (const [key, arts] of grouped.entries()) {
232
+ if (key !== null) {
233
+ const label = runLabelMap.get(key) ?? `Run ${key.slice(0, 8)}`;
234
+ groupedEntries.push({ key, label, artifacts: arts });
235
+ }
236
+ }
237
+ // Sort run groups by first artifact created date (newest first)
238
+ groupedEntries.sort((a, b) => {
239
+ const aDate = a.artifacts[0]?.created ?? "";
240
+ const bDate = b.artifacts[0]?.created ?? "";
241
+ return bDate.localeCompare(aDate);
242
+ });
243
+
244
+ const ungrouped = grouped.get(null) ?? [];
245
+
246
+ return (
247
+ <div>
248
+ <div className="flex items-center justify-between mb-4">
249
+ <h2 className="text-sm font-medium text-gray-300">Artifacts</h2>
250
+ <span className="text-xs text-gray-500">{artifacts.length} total</span>
251
+ </div>
252
+
253
+ {groupedEntries.map(({ key, label, artifacts: arts }) => (
254
+ <RunGroup
255
+ key={key}
256
+ label={label}
257
+ artifacts={arts}
258
+ onOpenArtifact={handleOpenArtifact}
259
+ />
260
+ ))}
261
+
262
+ {ungrouped.length > 0 && (
263
+ <RunGroup
264
+ key="__ungrouped__"
265
+ label="Ungrouped"
266
+ artifacts={ungrouped}
267
+ defaultOpen={groupedEntries.length === 0}
268
+ onOpenArtifact={handleOpenArtifact}
269
+ />
270
+ )}
271
+ </div>
272
+ );
273
+ }
274
+
275
+ // ---- Case 2: no artifacts but hfRepos present — legacy fallback ----
276
+ if (hfRepos.length > 0) {
277
+ return (
278
+ <div>
279
+ <div className="flex items-center justify-between mb-4">
280
+ <h2 className="text-sm font-medium text-gray-300">HuggingFace Datasets</h2>
281
+ <span className="text-xs text-gray-500 italic">legacy — no manifest entries</span>
282
+ </div>
283
+ <div className="grid gap-2">
284
+ {hfRepos.map((repo, i) => (
285
+ <div
286
+ key={`${repo.repo}-${i}`}
287
+ className="flex items-center justify-between bg-gray-900 rounded p-3 border border-gray-800"
288
+ >
289
+ <div>
290
+ <a
291
+ href={`https://huggingface.co/datasets/${repo.repo}`}
292
+ target="_blank"
293
+ rel="noopener noreferrer"
294
+ className="text-cyan-400 hover:text-cyan-300 text-sm"
295
+ >
296
+ {repo.repo}
297
+ </a>
298
+ {repo.description && (
299
+ <p className="text-xs text-gray-500 mt-0.5">{repo.description}</p>
300
+ )}
301
+ </div>
302
+ <span className="text-xs text-gray-600">{repo.date || ""}</span>
303
+ </div>
304
+ ))}
305
+ </div>
306
+ </div>
307
+ );
308
+ }
309
+
310
+ // ---- Case 3: both empty ----
311
+ return (
312
+ <div className="flex items-center justify-center h-32">
313
+ <p className="text-sm text-gray-500 italic">No artifacts recorded yet.</p>
314
+ </div>
315
+ );
316
+ };
317
+
318
+ return (
319
+ <div className="relative">
320
+ {renderContent()}
321
+
322
+ {activeViewer && (
323
+ <div className="absolute inset-0 z-10 bg-gray-900">
324
+ {activeViewer.type === "table" && (
325
+ <TableViewer datasetRepo={activeViewer.repo} onClose={closeViewer} />
326
+ )}
327
+ {activeViewer.type === "plotly" && (
328
+ <PlotlyViewer datasetRepo={activeViewer.repo} onClose={closeViewer} />
329
+ )}
330
+ {activeViewer.type === "image" && (
331
+ <ImageViewer datasetRepo={activeViewer.repo} onClose={closeViewer} />
332
+ )}
333
+ {activeViewer.type === "yaml_config" && (
334
+ <YamlViewer datasetRepo={activeViewer.repo} onClose={closeViewer} />
335
+ )}
336
+ {activeViewer.type === "heatmap" && (
337
+ <HeatmapViewer datasetRepo={activeViewer.repo} onClose={closeViewer} />
338
+ )}
339
+ </div>
340
+ )}
341
+ </div>
342
+ );
343
+ }
frontend/src/experiments/components/ExperimentDetail.tsx ADDED
@@ -0,0 +1,464 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useCallback } from "react";
2
+ import type { Artifact, ExperimentDetail as ExperimentDetailType, ExperimentNote } from "../types";
3
+ import { HF_ORG } from "../../config";
4
+ import { deriveDisplayStatus, statusBadgeColor } from "../types";
5
+ import { navigateTo, replaceRoute, parseHash } from "../../hashRouter";
6
+ import Markdown from "./Markdown";
7
+ import TimelineTab from "./TimelineTab";
8
+ import ArtifactsTab from "./ArtifactsTab";
9
+
10
+ type Tab = "overview" | "artifacts" | "notes" | "live" | "timeline" | "red_team_brief";
11
+
12
+ const LIVE_JOB_STATUS_COLORS: Record<string, string> = {
13
+ pending: "text-gray-400",
14
+ running: "text-yellow-400",
15
+ completed: "text-green-400",
16
+ failed: "text-red-400",
17
+ blocked: "text-red-400",
18
+ };
19
+
20
+ const TIMELINE_DOT_COLORS: Record<string, string> = {
21
+ blocked: "bg-red-500",
22
+ failed: "bg-red-500",
23
+ error: "bg-red-500",
24
+ completed: "bg-green-500",
25
+ started: "bg-yellow-500",
26
+ submitted: "bg-blue-500",
27
+ };
28
+
29
+ interface Props {
30
+ experiment: ExperimentDetailType;
31
+ onBack: () => void;
32
+ onSelectNote: (noteId: string) => void;
33
+ onRefresh: () => void;
34
+ }
35
+
36
+ /** Group notes by their directory path */
37
+ function groupNotesByDir(notes: ExperimentNote[]): Map<string, ExperimentNote[]> {
38
+ const groups = new Map<string, ExperimentNote[]>();
39
+ for (const note of notes) {
40
+ const relPath = note.relative_path || note.filename || "";
41
+ const dir = relPath.includes("/") ? relPath.substring(0, relPath.lastIndexOf("/")) : "(root)";
42
+ if (!groups.has(dir)) groups.set(dir, []);
43
+ groups.get(dir)!.push(note);
44
+ }
45
+ // Sort directories
46
+ return new Map([...groups.entries()].sort(([a], [b]) => a.localeCompare(b)));
47
+ }
48
+
49
+ const VALID_TABS = new Set<Tab>(["overview", "artifacts", "notes", "live", "timeline", "red_team_brief"]);
50
+
51
+ function getInitialTab(): Tab {
52
+ const route = parseHash();
53
+ const t = route.params.get("tab");
54
+ if (t && VALID_TABS.has(t as Tab)) return t as Tab;
55
+ return "overview";
56
+ }
57
+
58
+ export default function ExperimentDetail({ experiment, onBack, onSelectNote, onRefresh }: Props) {
59
+ const [tab, _setTab] = useState<Tab>(getInitialTab);
60
+
61
+ const setTab = useCallback((t: Tab) => {
62
+ _setTab(t);
63
+ const route = parseHash();
64
+ const params = new URLSearchParams(route.params);
65
+ if (t === "overview") {
66
+ params.delete("tab");
67
+ } else {
68
+ params.set("tab", t);
69
+ }
70
+ replaceRoute({ params });
71
+ }, []);
72
+
73
+ const liveJobCount = Object.keys(experiment.live_jobs || {}).length;
74
+ const isFinished = !!experiment.zayne_findings;
75
+
76
+ const TABS: { id: Tab; label: string; count?: number }[] = [
77
+ { id: "overview", label: "Overview" },
78
+ ...(experiment.red_team_brief
79
+ ? [{ id: "red_team_brief" as Tab, label: "Red Team Brief" }]
80
+ : []),
81
+ ...(experiment.live_status || liveJobCount > 0
82
+ ? [{ id: "live" as Tab, label: "Live Jobs", count: liveJobCount }]
83
+ : []),
84
+ { id: "timeline", label: "Timeline", count: experiment.activity_log?.length || 0 },
85
+ { id: "artifacts", label: "Artifacts", count: experiment.artifacts?.length || experiment.hf_repos?.length || 0 },
86
+ { id: "notes", label: "Files", count: experiment.experiment_notes?.length || 0 },
87
+ ];
88
+
89
+ return (
90
+ <div className={tab === "overview" ? "h-full overflow-y-auto" : "h-full flex flex-col"}>
91
+ {/* Header */}
92
+ <div className="px-6 py-4 border-b border-gray-800">
93
+ <div className="flex items-center gap-2 mb-3">
94
+ <button
95
+ onClick={onBack}
96
+ className="text-gray-400 hover:text-gray-200 text-sm transition-colors"
97
+ >
98
+ &larr; Experiments
99
+ </button>
100
+ </div>
101
+ <div className="flex items-start justify-between">
102
+ <div>
103
+ <div className="flex items-center gap-2">
104
+ <h1 className="text-lg font-semibold text-gray-200">{experiment.name}</h1>
105
+ {isFinished && (
106
+ <span className="text-xs px-2 py-0.5 rounded-full font-medium bg-emerald-900 text-emerald-300 border border-emerald-700">
107
+ Finished
108
+ </span>
109
+ )}
110
+ {experiment.live_status && !isFinished && (
111
+ <span className={`text-xs px-2 py-0.5 rounded-full font-medium ${statusBadgeColor(deriveDisplayStatus(experiment))}`}>
112
+ {deriveDisplayStatus(experiment).replace("_", " ")}
113
+ </span>
114
+ )}
115
+ </div>
116
+ {experiment.live_message && (
117
+ <p className="text-xs text-cyan-400/80 mt-0.5">{experiment.live_message}</p>
118
+ )}
119
+ {experiment.zayne_summary ? (
120
+ <div className="mt-1 max-w-2xl">
121
+ <span className="text-[10px] font-bold uppercase tracking-wider text-amber-400/80">Researcher's Summary</span>
122
+ <div className="text-sm text-gray-300 mt-0.5">
123
+ <Markdown content={experiment.zayne_summary} />
124
+ </div>
125
+ </div>
126
+ ) : experiment.hypothesis?.statement ? (
127
+ <p className="text-sm text-gray-400 mt-1 max-w-2xl italic">
128
+ {experiment.hypothesis.statement}
129
+ </p>
130
+ ) : null}
131
+ </div>
132
+ </div>
133
+
134
+ {/* Detail tabs */}
135
+ <div className="flex gap-1 mt-4">
136
+ {TABS.map((t) => (
137
+ <button
138
+ key={t.id}
139
+ onClick={() => setTab(t.id)}
140
+ className={`px-3 py-1.5 text-sm rounded-t transition-colors ${
141
+ tab === t.id
142
+ ? "bg-gray-800 text-gray-200 border border-gray-700 border-b-gray-800"
143
+ : "text-gray-500 hover:text-gray-300"
144
+ }`}
145
+ >
146
+ {t.label}
147
+ {t.count !== undefined && (
148
+ <span className="ml-1 text-xs text-gray-500">({t.count})</span>
149
+ )}
150
+ </button>
151
+ ))}
152
+ </div>
153
+ </div>
154
+
155
+ {/* Tab content */}
156
+ <div className={tab === "overview" ? "p-6" : "flex-1 overflow-y-auto p-6"}>
157
+ {tab === "overview" && (
158
+ <div className="space-y-4">
159
+ {/* FINDINGS (above readme, only when filled) */}
160
+ {experiment.zayne_findings && (
161
+ <div className="border-l-4 border-emerald-500 bg-emerald-950/30 rounded-r p-4">
162
+ <span className="text-[10px] font-bold uppercase tracking-wider text-emerald-400">Findings</span>
163
+ <div className="mt-2">
164
+ <Markdown content={experiment.zayne_findings} />
165
+ </div>
166
+ </div>
167
+ )}
168
+
169
+ {/* Researcher's README */}
170
+ {experiment.zayne_readme && (
171
+ <div className="border-l-4 border-amber-500 bg-gray-900/80 rounded-r p-4">
172
+ <span className="text-[10px] font-bold uppercase tracking-wider text-amber-400">Researcher's README</span>
173
+ <div className="mt-2">
174
+ <Markdown content={experiment.zayne_readme} />
175
+ </div>
176
+ </div>
177
+ )}
178
+
179
+ {/* DECISIONS (below readme, only when filled) */}
180
+ {experiment.zayne_decisions && (
181
+ <div className="border-l-4 border-violet-500 bg-violet-950/30 rounded-r p-4">
182
+ <span className="text-[10px] font-bold uppercase tracking-wider text-violet-400">Decisions</span>
183
+ <div className="mt-2">
184
+ <Markdown content={experiment.zayne_decisions} />
185
+ </div>
186
+ </div>
187
+ )}
188
+
189
+ {/* Agent Notes (EXPERIMENT_README) - always shown */}
190
+ {experiment.notes && (
191
+ <div className="border-l-4 border-cyan-500 bg-gray-900/80 rounded-r p-4">
192
+ <span className="text-[10px] font-bold uppercase tracking-wider text-cyan-400">Experiment Notes</span>
193
+ <div className="mt-2">
194
+ <Markdown content={experiment.notes} />
195
+ </div>
196
+ </div>
197
+ )}
198
+ </div>
199
+ )}
200
+
201
+ {tab === "artifacts" && (
202
+ <ArtifactsTab
203
+ artifacts={experiment.artifacts || []}
204
+ hfRepos={experiment.hf_repos || []}
205
+ runs={experiment.runs || []}
206
+ onOpenArtifact={(artifact: Artifact) => {
207
+ const INLINE_TYPES = new Set(["table", "yaml_config", "plotly", "image"]);
208
+ if (!artifact.visualizer_type || INLINE_TYPES.has(artifact.visualizer_type)) {
209
+ return;
210
+ }
211
+ const vizTabMap: Record<string, string> = {
212
+ model_trace: "model",
213
+ };
214
+ const vizTab = vizTabMap[artifact.visualizer_type];
215
+ if (vizTab) {
216
+ const fullName = artifact.dataset_name.includes("/")
217
+ ? artifact.dataset_name
218
+ : `${HF_ORG}/${artifact.dataset_name}`;
219
+ navigateTo({
220
+ page: "viz",
221
+ tab: vizTab,
222
+ params: new URLSearchParams({
223
+ repos: fullName,
224
+ from_exp: experiment.id,
225
+ }),
226
+ });
227
+ }
228
+ }}
229
+ />
230
+ )}
231
+
232
+ {tab === "red_team_brief" && experiment.red_team_brief && (
233
+ <div>
234
+ <div className="border-l-4 border-cyan-500 bg-gray-900/80 rounded-r p-6">
235
+ <div className="flex items-center gap-2 mb-4">
236
+ <span className="text-red-400 text-lg">&#9888;</span>
237
+ <span className="text-[10px] font-bold uppercase tracking-wider text-cyan-400">Claude Code: Red Team Brief</span>
238
+ </div>
239
+ <Markdown content={experiment.red_team_brief} />
240
+ </div>
241
+ </div>
242
+ )}
243
+
244
+ {tab === "notes" && (
245
+ <div>
246
+ <div className="flex justify-between items-center mb-4">
247
+ <h2 className="text-sm font-medium text-gray-300">Project Files</h2>
248
+ <span className="text-xs text-gray-500">
249
+ {experiment.experiment_notes?.length || 0} files
250
+ </span>
251
+ </div>
252
+
253
+ {(experiment.experiment_notes || []).length === 0 ? (
254
+ <p className="text-sm text-gray-500">No project files found.</p>
255
+ ) : (
256
+ <div className="space-y-4">
257
+ {[...groupNotesByDir(experiment.experiment_notes)].map(([dir, files]) => (
258
+ <div key={dir}>
259
+ <div className="flex items-center gap-2 mb-2">
260
+ <span className="text-xs text-cyan-400/70 font-mono">{dir}/</span>
261
+ <span className="text-xs text-gray-600">({files.length})</span>
262
+ </div>
263
+ <div className="grid gap-1 ml-3">
264
+ {files.map((note) => (
265
+ <button
266
+ key={note.id}
267
+ onClick={() => onSelectNote(note.id)}
268
+ className="w-full text-left bg-gray-900 hover:bg-gray-800 border border-gray-800 hover:border-gray-700 rounded px-3 py-2 transition-colors group"
269
+ >
270
+ <div className="flex items-center justify-between">
271
+ <div className="flex items-center gap-2">
272
+ <span className="text-xs text-gray-500">
273
+ {note.filename.endsWith(".yaml") || note.filename.endsWith(".yml") ? "&#9881;" : "&#9776;"}
274
+ </span>
275
+ <span className="text-sm text-gray-300 group-hover:text-gray-200 font-mono">
276
+ {note.filename}
277
+ </span>
278
+ </div>
279
+ </div>
280
+ </button>
281
+ ))}
282
+ </div>
283
+ </div>
284
+ ))}
285
+ </div>
286
+ )}
287
+ </div>
288
+ )}
289
+
290
+ {tab === "timeline" && (
291
+ <TimelineTab
292
+ entries={experiment.activity_log || []}
293
+ onArtifactClick={(datasetName) => console.log("artifact clicked:", datasetName)}
294
+ />
295
+ )}
296
+
297
+ {tab === "live" && (
298
+ <div className="space-y-6">
299
+ {/* Unreachable clusters warning */}
300
+ {experiment.unreachable_clusters && Object.keys(experiment.unreachable_clusters).length > 0 && (
301
+ <div className="bg-orange-900/20 border border-orange-800/50 rounded-lg p-3">
302
+ <h3 className="text-xs font-medium text-orange-400 uppercase tracking-wide mb-2">Unreachable Clusters</h3>
303
+ <div className="space-y-1">
304
+ {Object.entries(experiment.unreachable_clusters).map(([cluster, info]) => (
305
+ <div key={cluster} className="flex items-center justify-between text-sm">
306
+ <span className="text-orange-300 font-medium">{cluster}</span>
307
+ <span className="text-orange-400/70 text-xs">
308
+ {info.reason} (since {new Date(info.since).toLocaleString()})
309
+ </span>
310
+ </div>
311
+ ))}
312
+ </div>
313
+ </div>
314
+ )}
315
+
316
+ {/* Jobs table */}
317
+ <div>
318
+ <h2 className="text-sm font-medium text-gray-300 mb-3">Active Jobs</h2>
319
+ {liveJobCount === 0 ? (
320
+ <p className="text-sm text-gray-500">No live jobs tracked.</p>
321
+ ) : (
322
+ <div className="overflow-x-auto">
323
+ <table className="w-full text-sm">
324
+ <thead>
325
+ <tr className="text-xs text-gray-500 uppercase tracking-wide border-b border-gray-800">
326
+ <th className="text-left py-2 px-2">ID</th>
327
+ <th className="text-left py-2 px-2">Cluster</th>
328
+ <th className="text-left py-2 px-2">GPUs</th>
329
+ <th className="text-left py-2 px-2">Status</th>
330
+ <th className="text-left py-2 px-2">Message</th>
331
+ <th className="text-left py-2 px-2">ETA</th>
332
+ </tr>
333
+ </thead>
334
+ <tbody>
335
+ {Object.entries(experiment.live_jobs || {}).map(([jobId, job]) => (
336
+ <tr
337
+ key={jobId}
338
+ className={`border-b border-gray-800/50 ${
339
+ job.status === "blocked" ? "bg-red-900/10" :
340
+ job.status === "failed" ? "bg-red-900/10" :
341
+ "hover:bg-gray-900/50"
342
+ }`}
343
+ >
344
+ <td className="py-2 px-2 text-gray-300 font-mono text-xs">
345
+ {jobId}
346
+ {job.slurm_job_id && (
347
+ <span className="text-gray-600 ml-1">({job.slurm_job_id})</span>
348
+ )}
349
+ </td>
350
+ <td className="py-2 px-2 text-gray-400">
351
+ {job.cluster}
352
+ {job.partition && (
353
+ <span className="text-gray-600 text-xs ml-1">/{job.partition}</span>
354
+ )}
355
+ </td>
356
+ <td className="py-2 px-2 text-gray-400">{job.gpus}</td>
357
+ <td className={`py-2 px-2 font-medium ${LIVE_JOB_STATUS_COLORS[job.status] || "text-gray-400"}`}>
358
+ {job.status}
359
+ {job.blocker && (
360
+ <span className="text-red-400/70 text-xs ml-1">
361
+ ({job.blocker.reason})
362
+ </span>
363
+ )}
364
+ </td>
365
+ <td className="py-2 px-2 text-gray-400 text-xs max-w-xs truncate">
366
+ {job.message || "-"}
367
+ </td>
368
+ <td className="py-2 px-2 text-gray-500 text-xs">
369
+ {job.estimated_completion
370
+ ? new Date(job.estimated_completion).toLocaleString()
371
+ : "-"}
372
+ </td>
373
+ </tr>
374
+ ))}
375
+ </tbody>
376
+ </table>
377
+ </div>
378
+ )}
379
+ </div>
380
+
381
+ {/* Job metrics summary */}
382
+ {liveJobCount > 0 && (
383
+ <div>
384
+ <h2 className="text-sm font-medium text-gray-300 mb-3">Job Metrics</h2>
385
+ <div className="grid grid-cols-2 md:grid-cols-4 gap-2">
386
+ {Object.entries(experiment.live_jobs || {}).map(([jobId, job]) =>
387
+ Object.keys(job.metrics || {}).length > 0 ? (
388
+ <div key={jobId} className="bg-gray-900 rounded p-3 border border-gray-800">
389
+ <span className="text-xs text-gray-500 font-mono">{jobId}</span>
390
+ <div className="mt-1 space-y-0.5">
391
+ {Object.entries(job.metrics).map(([k, v]) => (
392
+ <div key={k} className="flex justify-between text-xs">
393
+ <span className="text-gray-400">{k}</span>
394
+ <span className="text-gray-300 font-mono">
395
+ {typeof v === "number" ? v.toFixed(3) : v}
396
+ </span>
397
+ </div>
398
+ ))}
399
+ </div>
400
+ </div>
401
+ ) : null
402
+ )}
403
+ </div>
404
+ </div>
405
+ )}
406
+
407
+ {/* Timeline */}
408
+ <div>
409
+ <h2 className="text-sm font-medium text-gray-300 mb-3">Event Timeline</h2>
410
+ {(!experiment.live_history || experiment.live_history.length === 0) ? (
411
+ <p className="text-sm text-gray-500">No timeline events recorded.</p>
412
+ ) : (
413
+ <div className="space-y-0">
414
+ {experiment.live_history.slice(-10).reverse().map((entry, i) => {
415
+ const eventLower = entry.event.toLowerCase();
416
+ const dotColor =
417
+ TIMELINE_DOT_COLORS[eventLower] ||
418
+ (eventLower.includes("block") || eventLower.includes("fail") || eventLower.includes("error")
419
+ ? "bg-red-500"
420
+ : "bg-gray-500");
421
+ return (
422
+ <div key={i} className="flex items-start gap-3 py-2 border-l-2 border-gray-800 pl-4 relative">
423
+ <div className={`absolute -left-[5px] top-3 w-2 h-2 rounded-full ${dotColor}`} />
424
+ <div className="flex-1 min-w-0">
425
+ <div className="flex items-center gap-2">
426
+ <span className="text-xs font-medium text-gray-300">{entry.event}</span>
427
+ {entry.cluster && (
428
+ <span className="text-xs text-gray-500">{entry.cluster}</span>
429
+ )}
430
+ {entry.job_id && (
431
+ <span className="text-xs text-gray-600 font-mono">{entry.job_id}</span>
432
+ )}
433
+ </div>
434
+ {entry.message && (
435
+ <p className="text-xs text-gray-400 mt-0.5 truncate">{entry.message}</p>
436
+ )}
437
+ <span className="text-xs text-gray-600">
438
+ {new Date(entry.timestamp).toLocaleString()}
439
+ </span>
440
+ </div>
441
+ </div>
442
+ );
443
+ })}
444
+ </div>
445
+ )}
446
+ </div>
447
+
448
+ {/* Timestamps */}
449
+ {(experiment.live_started_at || experiment.live_updated_at) && (
450
+ <div className="flex gap-6 text-xs text-gray-600 border-t border-gray-800 pt-3">
451
+ {experiment.live_started_at && (
452
+ <span>Started: {new Date(experiment.live_started_at).toLocaleString()}</span>
453
+ )}
454
+ {experiment.live_updated_at && (
455
+ <span>Last update: {new Date(experiment.live_updated_at).toLocaleString()}</span>
456
+ )}
457
+ </div>
458
+ )}
459
+ </div>
460
+ )}
461
+ </div>
462
+ </div>
463
+ );
464
+ }
frontend/src/experiments/components/ExperimentList.tsx ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState } from "react";
2
+ import type { Experiment, LiveJobState } from "../types";
3
+ import { deriveDisplayStatus, statusBadgeColor } from "../types";
4
+
5
+ function jobSummaryText(jobs: Record<string, LiveJobState>): string {
6
+ const all = Object.values(jobs);
7
+ if (all.length === 0) return "";
8
+ const running = all.filter((j) => j.status === "running").length;
9
+ const blocked = all.filter((j) => j.status === "blocked").length;
10
+ const completed = all.filter((j) => j.status === "completed").length;
11
+ const parts: string[] = [];
12
+ if (running > 0) parts.push(`${running}/${all.length} running`);
13
+ if (blocked > 0) parts.push(`${blocked} blocked`);
14
+ if (completed > 0 && running === 0 && blocked === 0) parts.push(`${completed} completed`);
15
+ return parts.join(", ");
16
+ }
17
+
18
+ type SortKey = "updated" | "name" | "completeness";
19
+
20
+ interface Props {
21
+ experiments: Experiment[];
22
+ onSelect: (id: string) => void;
23
+ onRefresh: () => void;
24
+ }
25
+
26
+ export default function ExperimentList({ experiments, onSelect, onRefresh }: Props) {
27
+ const [sortBy, setSortBy] = useState<SortKey>("updated");
28
+ const [filterFinished, setFilterFinished] = useState<"all" | "finished" | "active">("all");
29
+
30
+ const filtered = experiments.filter((e) => {
31
+ if (filterFinished === "all") return true;
32
+ const isFinished = !!e.zayne_findings;
33
+ return filterFinished === "finished" ? isFinished : !isFinished;
34
+ });
35
+
36
+ const sorted = [...filtered].sort((a, b) => {
37
+ switch (sortBy) {
38
+ case "updated":
39
+ return (b.updated || "").localeCompare(a.updated || "");
40
+ case "name":
41
+ return a.name.localeCompare(b.name);
42
+ case "completeness":
43
+ return (b.completeness || 0) - (a.completeness || 0);
44
+ default:
45
+ return 0;
46
+ }
47
+ });
48
+
49
+ return (
50
+ <div className="h-full flex flex-col">
51
+ {/* Header */}
52
+ <div className="flex items-center justify-between px-6 py-4 border-b border-gray-800">
53
+ <h1 className="text-lg font-semibold text-gray-200">Experiments</h1>
54
+ <div className="flex items-center gap-3">
55
+ {/* Filter */}
56
+ <select
57
+ value={filterFinished}
58
+ onChange={(e) => setFilterFinished(e.target.value as "all" | "finished" | "active")}
59
+ className="bg-gray-800 text-gray-300 text-sm rounded px-2 py-1.5 border border-gray-700"
60
+ >
61
+ <option value="all">All</option>
62
+ <option value="active">Active</option>
63
+ <option value="finished">Finished</option>
64
+ </select>
65
+
66
+ {/* Sort */}
67
+ <select
68
+ value={sortBy}
69
+ onChange={(e) => setSortBy(e.target.value as SortKey)}
70
+ className="bg-gray-800 text-gray-300 text-sm rounded px-2 py-1.5 border border-gray-700"
71
+ >
72
+ <option value="updated">Last Updated</option>
73
+ <option value="name">Name</option>
74
+ <option value="completeness">Completeness</option>
75
+ </select>
76
+
77
+ </div>
78
+ </div>
79
+
80
+ {/* Experiment cards */}
81
+ <div className="flex-1 overflow-y-auto p-6">
82
+ {sorted.length === 0 ? (
83
+ <div className="flex flex-col items-center justify-center h-64 text-gray-500">
84
+ <p className="text-lg mb-2">No experiments found</p>
85
+ <p className="text-sm">Try a different filter</p>
86
+ </div>
87
+ ) : (
88
+ <div className="grid gap-3">
89
+ {sorted.map((exp) => {
90
+ const isFinished = !!exp.zayne_findings;
91
+ return (
92
+ <button
93
+ key={exp.id}
94
+ onClick={() => onSelect(exp.id)}
95
+ className="w-full text-left bg-gray-900 hover:bg-gray-800 border border-gray-800 hover:border-gray-700 rounded-lg p-4 transition-colors"
96
+ >
97
+ <div className="flex items-start justify-between">
98
+ <div className="flex-1 min-w-0">
99
+ <div className="flex items-center gap-2 mb-1">
100
+ {isFinished && (
101
+ <span className="text-xs px-2 py-0.5 rounded-full font-medium bg-emerald-900 text-emerald-300 border border-emerald-700">
102
+ Finished
103
+ </span>
104
+ )}
105
+ <h3 className="text-sm font-medium text-gray-200 truncate">
106
+ {exp.name}
107
+ </h3>
108
+ {exp.live_status && !isFinished && (
109
+ <span className={`text-xs px-2 py-0.5 rounded-full font-medium ${statusBadgeColor(deriveDisplayStatus(exp))}`}>
110
+ {deriveDisplayStatus(exp).replace("_", " ")}
111
+ </span>
112
+ )}
113
+ </div>
114
+ {exp.live_message && (
115
+ <p className="text-xs text-cyan-400/80 mt-0.5 truncate">
116
+ {exp.live_message}
117
+ </p>
118
+ )}
119
+ {/* Researcher's summary takes priority, fallback to hypothesis */}
120
+ {exp.zayne_summary ? (
121
+ <div className="mt-1">
122
+ <span className="text-[9px] font-bold uppercase tracking-wider text-amber-400/70">Researcher's Summary</span>
123
+ <p className="text-xs text-gray-300 line-clamp-2">
124
+ {exp.zayne_summary}
125
+ </p>
126
+ </div>
127
+ ) : exp.hypothesis?.statement ? (
128
+ <p className="text-xs text-gray-400 mt-1 line-clamp-2 italic">
129
+ {exp.hypothesis.statement}
130
+ </p>
131
+ ) : null}
132
+ {exp.live_jobs && Object.keys(exp.live_jobs).length > 0 && (
133
+ <div className="flex items-center gap-3 mt-2">
134
+ <span className="text-xs text-gray-400">
135
+ {jobSummaryText(exp.live_jobs)}
136
+ </span>
137
+ </div>
138
+ )}
139
+ </div>
140
+ <div className="flex flex-col items-end ml-4 shrink-0">
141
+ <span className="text-xs text-gray-600">
142
+ {exp.updated ? new Date(exp.updated).toLocaleDateString() : ""}
143
+ </span>
144
+ </div>
145
+ </div>
146
+ </button>
147
+ );
148
+ })}
149
+ </div>
150
+ )}
151
+ </div>
152
+ </div>
153
+ );
154
+ }
frontend/src/experiments/components/Markdown.tsx ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ReactMarkdown from "react-markdown";
2
+ import remarkGfm from "remark-gfm";
3
+ import remarkMath from "remark-math";
4
+ import rehypeKatex from "rehype-katex";
5
+ import "katex/dist/katex.min.css";
6
+
7
+ interface Props {
8
+ content: string;
9
+ }
10
+
11
+ export default function Markdown({ content }: Props) {
12
+ return (
13
+ <div className="break-words" style={{ overflowWrap: "anywhere" }}>
14
+ <ReactMarkdown
15
+ remarkPlugins={[remarkGfm, remarkMath]}
16
+ rehypePlugins={[rehypeKatex]}
17
+ components={{
18
+ h1: ({ children }) => <h1 className="text-xl font-bold text-gray-200 mb-3 mt-6 first:mt-0">{children}</h1>,
19
+ h2: ({ children }) => <h2 className="text-lg font-semibold text-gray-200 mb-2 mt-5 first:mt-0">{children}</h2>,
20
+ h3: ({ children }) => <h3 className="text-base font-semibold text-gray-300 mb-2 mt-4">{children}</h3>,
21
+ h4: ({ children }) => <h4 className="text-sm font-semibold text-gray-300 mb-1 mt-3">{children}</h4>,
22
+ p: ({ children }) => <p className="text-sm text-gray-300 mb-2 leading-relaxed whitespace-pre-wrap break-words">{children}</p>,
23
+ ul: ({ children }) => <ul className="list-disc list-inside text-sm text-gray-300 mb-2 space-y-0.5 ml-2">{children}</ul>,
24
+ ol: ({ children }) => <ol className="list-decimal list-inside text-sm text-gray-300 mb-2 space-y-0.5 ml-2">{children}</ol>,
25
+ li: ({ children }) => <li className="text-sm text-gray-300">{children}</li>,
26
+ a: ({ href, children }) => (
27
+ <a href={href} target="_blank" rel="noopener noreferrer" className="text-cyan-400 hover:text-cyan-300 underline">
28
+ {children}
29
+ </a>
30
+ ),
31
+ code: ({ className, children }) => {
32
+ const isBlock = className?.includes("language-");
33
+ if (isBlock) {
34
+ return (
35
+ <pre className="bg-gray-950 border border-gray-800 rounded p-3 mb-2 overflow-x-auto">
36
+ <code className="text-xs text-gray-300 font-mono">{children}</code>
37
+ </pre>
38
+ );
39
+ }
40
+ return <code className="bg-gray-800 text-cyan-300 text-xs px-1 py-0.5 rounded font-mono">{children}</code>;
41
+ },
42
+ pre: ({ children }) => <>{children}</>,
43
+ blockquote: ({ children }) => (
44
+ <blockquote className="border-l-2 border-gray-600 pl-3 my-2 text-sm text-gray-400 italic">{children}</blockquote>
45
+ ),
46
+ hr: () => <hr className="border-gray-700 my-4" />,
47
+ strong: ({ children }) => <strong className="text-gray-200 font-semibold">{children}</strong>,
48
+ em: ({ children }) => <em className="text-gray-400">{children}</em>,
49
+ table: ({ children }) => (
50
+ <div className="overflow-x-auto mb-3">
51
+ <table className="w-full text-sm border-collapse">{children}</table>
52
+ </div>
53
+ ),
54
+ thead: ({ children }) => <thead className="border-b border-gray-700">{children}</thead>,
55
+ tbody: ({ children }) => <tbody>{children}</tbody>,
56
+ tr: ({ children }) => <tr className="border-b border-gray-800/50">{children}</tr>,
57
+ th: ({ children }) => <th className="text-left py-1.5 px-2 text-xs text-gray-400 uppercase tracking-wide font-medium">{children}</th>,
58
+ td: ({ children }) => <td className="py-1.5 px-2 text-sm text-gray-300">{children}</td>,
59
+ }}
60
+ >
61
+ {content}
62
+ </ReactMarkdown>
63
+ </div>
64
+ );
65
+ }
frontend/src/experiments/components/NoteView.tsx ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { ExperimentNote } from "../types";
2
+ import Markdown from "./Markdown";
3
+
4
+ interface Props {
5
+ note: ExperimentNote;
6
+ experimentName: string;
7
+ onBack: () => void;
8
+ }
9
+
10
+ export default function NoteView({ note, experimentName, onBack }: Props) {
11
+ return (
12
+ <div className="h-full flex flex-col">
13
+ {/* Breadcrumb + header */}
14
+ <div className="px-6 py-4 border-b border-gray-800">
15
+ <div className="flex items-center gap-2 text-sm mb-3">
16
+ <button onClick={onBack} className="text-gray-400 hover:text-gray-200 transition-colors">
17
+ &larr; {experimentName}
18
+ </button>
19
+ <span className="text-gray-600">/</span>
20
+ <span className="text-gray-300">{note.title}</span>
21
+ </div>
22
+ <h1 className="text-lg font-semibold text-gray-200">{note.title}</h1>
23
+ {note.relative_path && (
24
+ <p className="text-xs text-gray-500 mt-1 font-mono">{note.relative_path}</p>
25
+ )}
26
+ </div>
27
+
28
+ {/* Content */}
29
+ <div className="flex-1 overflow-y-auto p-6">
30
+ <div className="border-l-4 border-cyan-500 bg-gray-900/80 rounded-r p-6">
31
+ <span className="text-[10px] font-bold uppercase tracking-wider text-cyan-400">Claude Code</span>
32
+ <div className="mt-3">
33
+ {note.content_md ? (
34
+ <Markdown content={note.content_md} />
35
+ ) : (
36
+ <span className="text-sm text-gray-600 italic">No content.</span>
37
+ )}
38
+ </div>
39
+ </div>
40
+ </div>
41
+ </div>
42
+ );
43
+ }
frontend/src/experiments/components/SubExperimentView.tsx ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState } from "react";
2
+ import type { SubExperiment } from "../types";
3
+ import { experimentsApi } from "../api";
4
+ import Markdown from "./Markdown";
5
+
6
+ interface Props {
7
+ sub: SubExperiment;
8
+ experimentName: string;
9
+ onBack: () => void;
10
+ onRefresh: () => void;
11
+ }
12
+
13
+ export default function SubExperimentView({ sub, experimentName, onBack, onRefresh }: Props) {
14
+ const [editing, setEditing] = useState(false);
15
+ const [content, setContent] = useState(sub.content_md || "");
16
+ const [hypothesis, setHypothesis] = useState(sub.hypothesis || "");
17
+ const [status, setStatus] = useState(sub.status || "active");
18
+ const [saving, setSaving] = useState(false);
19
+
20
+ const handleSave = async () => {
21
+ setSaving(true);
22
+ try {
23
+ await experimentsApi.updateSub(sub.experiment_id, sub.id, {
24
+ content_md: content,
25
+ hypothesis,
26
+ status,
27
+ });
28
+ setEditing(false);
29
+ onRefresh();
30
+ } finally {
31
+ setSaving(false);
32
+ }
33
+ };
34
+
35
+ return (
36
+ <div className="h-full flex flex-col">
37
+ {/* Breadcrumb + header */}
38
+ <div className="px-6 py-4 border-b border-gray-800">
39
+ <div className="flex items-center gap-2 text-sm mb-3">
40
+ <button onClick={onBack} className="text-gray-400 hover:text-gray-200 transition-colors">
41
+ &larr; {experimentName}
42
+ </button>
43
+ <span className="text-gray-600">/</span>
44
+ <span className="text-gray-300">{sub.name}</span>
45
+ </div>
46
+
47
+ <div className="flex items-start justify-between">
48
+ <div>
49
+ <h1 className="text-lg font-semibold text-gray-200">{sub.name}</h1>
50
+ {editing ? (
51
+ <div className="flex items-center gap-2 mt-2">
52
+ <input
53
+ value={hypothesis}
54
+ onChange={(e) => setHypothesis(e.target.value)}
55
+ placeholder="Hypothesis"
56
+ className="bg-gray-900 text-gray-200 text-sm rounded px-2 py-1 border border-gray-700 outline-none flex-1"
57
+ />
58
+ <select
59
+ value={status}
60
+ onChange={(e) => setStatus(e.target.value)}
61
+ className="bg-gray-800 text-gray-300 text-xs rounded px-2 py-1 border border-gray-700"
62
+ >
63
+ <option value="active">active</option>
64
+ <option value="concluded">concluded</option>
65
+ <option value="inconclusive">inconclusive</option>
66
+ </select>
67
+ </div>
68
+ ) : (
69
+ <div className="flex items-center gap-3 mt-1">
70
+ {sub.hypothesis && (
71
+ <p className="text-sm text-gray-400">{sub.hypothesis}</p>
72
+ )}
73
+ <span className={`text-xs px-2 py-0.5 rounded-full text-white ${
74
+ status === "concluded" ? "bg-green-600" :
75
+ status === "active" ? "bg-yellow-600" : "bg-gray-600"
76
+ }`}>
77
+ {status}
78
+ </span>
79
+ </div>
80
+ )}
81
+ </div>
82
+ <div className="flex gap-2">
83
+ {editing ? (
84
+ <>
85
+ <button onClick={() => { setEditing(false); setContent(sub.content_md || ""); }}
86
+ className="text-gray-400 hover:text-gray-200 text-sm px-3 py-1.5 rounded transition-colors">
87
+ Cancel
88
+ </button>
89
+ <button onClick={handleSave} disabled={saving}
90
+ className="bg-cyan-600 hover:bg-cyan-500 text-white text-sm font-medium px-3 py-1.5 rounded transition-colors">
91
+ {saving ? "Saving..." : "Save"}
92
+ </button>
93
+ </>
94
+ ) : (
95
+ <button onClick={() => setEditing(true)}
96
+ className="text-gray-400 hover:text-gray-200 text-sm px-3 py-1.5 rounded border border-gray-700 transition-colors">
97
+ Edit
98
+ </button>
99
+ )}
100
+ </div>
101
+ </div>
102
+ </div>
103
+
104
+ {/* Content */}
105
+ <div className="flex-1 overflow-y-auto p-6">
106
+ <div className="max-w-3xl">
107
+ {editing ? (
108
+ <textarea
109
+ value={content}
110
+ onChange={(e) => setContent(e.target.value)}
111
+ className="w-full bg-gray-900 text-gray-200 text-sm rounded px-4 py-3 border border-gray-700 focus:border-cyan-500 outline-none resize-y font-mono"
112
+ rows={30}
113
+ placeholder="Write your sub-experiment report in markdown..."
114
+ />
115
+ ) : (
116
+ <div className="bg-gray-900 rounded p-6 min-h-[300px]">
117
+ {sub.content_md ? (
118
+ <Markdown content={sub.content_md} />
119
+ ) : (
120
+ <span className="text-sm text-gray-600 italic">No content yet. Click Edit to add your sub-experiment report.</span>
121
+ )}
122
+ </div>
123
+ )}
124
+
125
+ {/* HF Repos */}
126
+ {(sub.hf_repos || []).length > 0 && (
127
+ <div className="mt-6">
128
+ <span className="text-xs text-gray-500 uppercase tracking-wide">Linked Datasets</span>
129
+ <div className="grid gap-2 mt-2">
130
+ {sub.hf_repos.map((repo, i) => (
131
+ <a
132
+ key={`${repo.repo}-${i}`}
133
+ href={`https://huggingface.co/datasets/${repo.repo}`}
134
+ target="_blank"
135
+ rel="noopener noreferrer"
136
+ className="text-cyan-400 hover:text-cyan-300 text-sm"
137
+ >
138
+ {repo.repo}
139
+ </a>
140
+ ))}
141
+ </div>
142
+ </div>
143
+ )}
144
+
145
+ {/* Timestamps */}
146
+ <div className="mt-6 flex gap-4 text-xs text-gray-600">
147
+ {sub.created && <span>Created: {new Date(sub.created).toLocaleDateString()}</span>}
148
+ {sub.updated && <span>Updated: {new Date(sub.updated).toLocaleDateString()}</span>}
149
+ </div>
150
+ </div>
151
+ </div>
152
+ </div>
153
+ );
154
+ }
frontend/src/experiments/components/SummaryFindingsView.tsx ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import Markdown from "./Markdown";
2
+
3
+ interface Props {
4
+ content: string;
5
+ onBack: () => void;
6
+ }
7
+
8
+ export default function SummaryFindingsView({ content, onBack }: Props) {
9
+ return (
10
+ <div className="h-full flex flex-col">
11
+ {/* Header */}
12
+ <div className="px-6 py-4 border-b border-gray-800">
13
+ <div className="flex items-center gap-2 text-sm mb-3">
14
+ <button onClick={onBack} className="text-gray-400 hover:text-gray-200 transition-colors">
15
+ &larr; Experiments
16
+ </button>
17
+ <span className="text-gray-600">/</span>
18
+ <span className="text-gray-300">Findings & Summary</span>
19
+ </div>
20
+ <h1 className="text-lg font-semibold text-gray-200">Findings & Summary</h1>
21
+ <p className="text-xs text-gray-500 mt-1">Definitive statements on what has been learned from experiments</p>
22
+ </div>
23
+
24
+ {/* Content */}
25
+ <div className="flex-1 overflow-y-auto p-6">
26
+ <div className="max-w-4xl">
27
+ <div className="bg-gray-900 rounded p-6">
28
+ {content ? (
29
+ <Markdown content={content} />
30
+ ) : (
31
+ <span className="text-sm text-gray-600 italic">No findings written yet.</span>
32
+ )}
33
+ </div>
34
+ </div>
35
+ </div>
36
+ </div>
37
+ );
38
+ }
frontend/src/experiments/components/TimelineTab.tsx ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useMemo } from "react";
2
+ import type { ActivityLogEntry, ActivityEntryType } from "../types";
3
+
4
+ interface TimelineTabProps {
5
+ entries: ActivityLogEntry[];
6
+ onArtifactClick: (datasetName: string) => void;
7
+ }
8
+
9
+ // Deterministic color hash for run-scoped labels
10
+ function scopeColor(scope: string): string {
11
+ if (scope === "debug") return "bg-gray-700 text-gray-300";
12
+ if (scope === "cross-run") return "bg-purple-900/60 text-purple-300";
13
+ if (scope === "meta") return "bg-blue-900/60 text-blue-300";
14
+
15
+ // Deterministic hash for other scopes (run labels etc.)
16
+ let hash = 0;
17
+ for (let i = 0; i < scope.length; i++) {
18
+ hash = (hash * 31 + scope.charCodeAt(i)) & 0xffff;
19
+ }
20
+ const palette = [
21
+ "bg-emerald-900/60 text-emerald-300",
22
+ "bg-amber-900/60 text-amber-300",
23
+ "bg-rose-900/60 text-rose-300",
24
+ "bg-teal-900/60 text-teal-300",
25
+ "bg-indigo-900/60 text-indigo-300",
26
+ "bg-fuchsia-900/60 text-fuchsia-300",
27
+ "bg-orange-900/60 text-orange-300",
28
+ "bg-lime-900/60 text-lime-300",
29
+ ];
30
+ return palette[hash % palette.length];
31
+ }
32
+
33
+ const TYPE_ICONS: Record<ActivityEntryType, string> = {
34
+ action: "▶",
35
+ result: "◆",
36
+ note: "✎",
37
+ milestone: "⚑",
38
+ };
39
+
40
+ const TYPE_LABELS: ActivityEntryType[] = ["action", "result", "note", "milestone"];
41
+
42
+ function relativeTime(iso: string): string {
43
+ const now = Date.now();
44
+ const then = new Date(iso).getTime();
45
+ const diffMs = now - then;
46
+ if (isNaN(diffMs)) return iso;
47
+
48
+ const diffSec = Math.floor(diffMs / 1000);
49
+ if (diffSec < 60) return `${diffSec}s ago`;
50
+ const diffMin = Math.floor(diffSec / 60);
51
+ if (diffMin < 60) return `${diffMin}m ago`;
52
+ const diffHr = Math.floor(diffMin / 60);
53
+ if (diffHr < 24) return `${diffHr}h ago`;
54
+ const diffDays = Math.floor(diffHr / 24);
55
+ return `${diffDays}d ago`;
56
+ }
57
+
58
+ export default function TimelineTab({ entries, onArtifactClick }: TimelineTabProps) {
59
+ const [scopeFilter, setScopeFilter] = useState<string>("all");
60
+ const [activeTypes, setActiveTypes] = useState<Set<ActivityEntryType>>(
61
+ new Set(TYPE_LABELS)
62
+ );
63
+
64
+ // Sorted most-recent-first (ensure stable order even if backend varies)
65
+ const sorted = useMemo(
66
+ () =>
67
+ [...entries].sort(
68
+ (a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime()
69
+ ),
70
+ [entries]
71
+ );
72
+
73
+ const uniqueScopes = useMemo(() => {
74
+ const set = new Set<string>();
75
+ entries.forEach((e) => set.add(e.scope));
76
+ return Array.from(set).sort();
77
+ }, [entries]);
78
+
79
+ const filtered = useMemo(
80
+ () =>
81
+ sorted.filter(
82
+ (e) =>
83
+ (scopeFilter === "all" || e.scope === scopeFilter) &&
84
+ activeTypes.has(e.type)
85
+ ),
86
+ [sorted, scopeFilter, activeTypes]
87
+ );
88
+
89
+ function toggleType(t: ActivityEntryType) {
90
+ setActiveTypes((prev) => {
91
+ const next = new Set(prev);
92
+ if (next.has(t)) {
93
+ // Keep at least one type active
94
+ if (next.size > 1) next.delete(t);
95
+ } else {
96
+ next.add(t);
97
+ }
98
+ return next;
99
+ });
100
+ }
101
+
102
+ if (entries.length === 0) {
103
+ return (
104
+ <div className="flex items-center justify-center h-40 text-sm text-gray-500 italic">
105
+ No activity log entries yet.
106
+ </div>
107
+ );
108
+ }
109
+
110
+ return (
111
+ <div className="space-y-4">
112
+ {/* Filter controls */}
113
+ <div className="flex flex-wrap items-center gap-3">
114
+ {/* Scope dropdown */}
115
+ <div className="flex items-center gap-2">
116
+ <span className="text-xs text-gray-500 uppercase tracking-wide">Scope</span>
117
+ <select
118
+ value={scopeFilter}
119
+ onChange={(e) => setScopeFilter(e.target.value)}
120
+ className="bg-gray-800 text-gray-300 text-xs rounded px-2 py-1 border border-gray-700 outline-none focus:border-cyan-500"
121
+ >
122
+ <option value="all">All</option>
123
+ {uniqueScopes.map((s) => (
124
+ <option key={s} value={s}>
125
+ {s}
126
+ </option>
127
+ ))}
128
+ </select>
129
+ </div>
130
+
131
+ {/* Type filter chips */}
132
+ <div className="flex items-center gap-1">
133
+ <span className="text-xs text-gray-500 uppercase tracking-wide mr-1">Type</span>
134
+ {TYPE_LABELS.map((t) => (
135
+ <button
136
+ key={t}
137
+ onClick={() => toggleType(t)}
138
+ className={`text-xs px-2 py-0.5 rounded-full border transition-colors ${
139
+ activeTypes.has(t)
140
+ ? "bg-cyan-900/50 border-cyan-700 text-cyan-300"
141
+ : "bg-gray-800 border-gray-700 text-gray-500 hover:text-gray-400"
142
+ }`}
143
+ >
144
+ {TYPE_ICONS[t]} {t}
145
+ </button>
146
+ ))}
147
+ </div>
148
+
149
+ <span className="text-xs text-gray-600 ml-auto flex items-center gap-2">
150
+ <span className="text-cyan-600">LLM-generated log</span>
151
+ <span>{filtered.length} of {entries.length}</span>
152
+ </span>
153
+ </div>
154
+
155
+ {/* Entries */}
156
+ {filtered.length === 0 ? (
157
+ <div className="text-sm text-gray-500 italic py-4">
158
+ No entries match the current filters.
159
+ </div>
160
+ ) : (
161
+ <div className="space-y-0 border-l-2 border-gray-800 pl-4">
162
+ {filtered.map((entry, i) => (
163
+ <div
164
+ key={i}
165
+ className="relative py-3 border-b border-gray-800/50 last:border-0"
166
+ >
167
+ {/* Timeline dot */}
168
+ <div className="absolute -left-[17px] top-4 w-2 h-2 rounded-full bg-gray-600" />
169
+
170
+ <div className="flex flex-wrap items-start gap-2">
171
+ {/* Timestamp */}
172
+ <span
173
+ title={new Date(entry.timestamp).toISOString()}
174
+ className="text-xs text-gray-600 shrink-0 mt-0.5 cursor-default"
175
+ >
176
+ {relativeTime(entry.timestamp)}
177
+ </span>
178
+
179
+ {/* Scope badge */}
180
+ <span
181
+ className={`text-xs px-2 py-0.5 rounded-full font-medium ${scopeColor(
182
+ entry.scope
183
+ )}`}
184
+ >
185
+ {entry.scope}
186
+ </span>
187
+
188
+ {/* Type icon + label */}
189
+ <span className="text-xs text-gray-500 shrink-0 mt-0.5">
190
+ {TYPE_ICONS[entry.type]}
191
+ </span>
192
+
193
+ {/* Author */}
194
+ <span
195
+ className={`text-xs shrink-0 mt-0.5 font-medium ${
196
+ entry.author === "agent"
197
+ ? "text-cyan-500"
198
+ : "text-amber-400"
199
+ }`}
200
+ >
201
+ {entry.author === "agent" ? "Claude Code" : "Researcher"}
202
+ </span>
203
+ </div>
204
+
205
+ {/* Message */}
206
+ <p className="text-sm text-gray-300 mt-1">{entry.message}</p>
207
+
208
+ {/* Artifact chips */}
209
+ {entry.artifacts && entry.artifacts.length > 0 && (
210
+ <div className="flex flex-wrap gap-1 mt-2">
211
+ {entry.artifacts.map((ds) => (
212
+ <button
213
+ key={ds}
214
+ onClick={() => onArtifactClick(ds)}
215
+ className="text-xs px-2 py-0.5 rounded-full bg-cyan-900/40 border border-cyan-800/60 text-cyan-400 hover:bg-cyan-900/70 hover:text-cyan-300 transition-colors"
216
+ >
217
+ {ds}
218
+ </button>
219
+ ))}
220
+ </div>
221
+ )}
222
+ </div>
223
+ ))}
224
+ </div>
225
+ )}
226
+ </div>
227
+ );
228
+ }
frontend/src/experiments/components/viewers/HeatmapViewer.tsx ADDED
@@ -0,0 +1,2158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useEffect, useCallback, useMemo } from "react";
2
+ import { HF_ORG } from "../../../config";
3
+
4
+ // ---------------------------------------------------------------------------
5
+ // Types
6
+ // ---------------------------------------------------------------------------
7
+
8
+ interface HeatmapViewerProps {
9
+ datasetRepo: string;
10
+ split?: string;
11
+ onClose: () => void;
12
+ }
13
+
14
+ interface SummaryRow {
15
+ model: string;
16
+ hf_model_name: string;
17
+ local_model_name: string;
18
+ task: string;
19
+ split: string;
20
+ self_tc: boolean;
21
+ neg_tc: boolean;
22
+ gpt2_tc: boolean;
23
+ finetuned: boolean;
24
+ training_config: string;
25
+ eval_variant: string;
26
+ gen_roc: number | null;
27
+ val_roc: number | null;
28
+ val_acc: number | null;
29
+ corr: number | null;
30
+ corr_pos: number | null;
31
+ corr_neg: number | null;
32
+ n_samples: number;
33
+ filename: string;
34
+ }
35
+
36
+ type Metric = "gen_roc" | "val_roc" | "val_acc" | "corr" | "corr_pos" | "corr_neg";
37
+ type EvalVariant = "raw" | "tc" | "lenorm" | "tc+lenorm";
38
+ type TCType = "none" | "self" | "neg" | "gpt2";
39
+ type ViewMode = "heatmap" | "bar";
40
+ type DomainFilter = "all" | "ood" | "in-domain";
41
+ type ComparisonPreset = "all" | "training-effect" | "plus-tcself" | "tcself-vs-tcneg" | "tcself-vs-tcgpt2";
42
+
43
+ // ---------------------------------------------------------------------------
44
+ // Constants
45
+ // ---------------------------------------------------------------------------
46
+
47
+ const HF_DATASETS_API = "https://datasets-server.huggingface.co";
48
+ const METRICS: { key: Metric; label: string }[] = [
49
+ { key: "gen_roc", label: "Gen ROC" },
50
+ { key: "val_roc", label: "Val ROC" },
51
+ { key: "val_acc", label: "Val Acc" },
52
+ { key: "corr", label: "Correlation" },
53
+ { key: "corr_pos", label: "Corr (pos)" },
54
+ { key: "corr_neg", label: "Corr (neg)" },
55
+ ];
56
+ const EVAL_VARIANTS: EvalVariant[] = ["raw", "tc", "lenorm", "tc+lenorm"];
57
+ const TC_TYPES: { key: TCType; label: string }[] = [
58
+ { key: "self", label: "Self TC" },
59
+ { key: "neg", label: "Neg TC" },
60
+ { key: "gpt2", label: "GPT-2 TC" },
61
+ ];
62
+ // Plotly default color cycle for bar charts
63
+ const BAR_COLORS = [
64
+ "#636EFA", "#EF553B", "#00CC96", "#AB63FA", "#FFA15A",
65
+ "#19D3F3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52",
66
+ ];
67
+
68
+ const TASK_FAMILIES = [
69
+ { key: "hypernym", label: "Hypernym", prefix: "hypernym-" },
70
+ { key: "ifeval", label: "IFEval", prefix: "ifeval-" },
71
+ { key: "plausibleqa", label: "PlausibleQA", prefix: "plausibleqa-" },
72
+ { key: "ambigqa", label: "AmbigQA", prefix: "ambigqa-" },
73
+ ];
74
+
75
+ // Finetuned models must use force-same-x and the correct combined training dataset.
76
+ // Hypernym and IFEval have single-task variants that must be excluded.
77
+ function isValidFinetunedModel(row: SummaryRow): boolean {
78
+ if (!row.finetuned) return true;
79
+ const tc = row.training_config;
80
+ if (!tc.includes("force-same-x")) return false;
81
+ // Hypernym models must be trained on the combined "double" dataset
82
+ if (tc.includes("hypernym-") && !tc.includes("hypernym-concat-bananas-to-dogs-double")) return false;
83
+ // IFEval models must be trained on the concat dataset
84
+ if (tc.includes("ifeval-") && !tc.includes("ifeval-concat")) return false;
85
+ return true;
86
+ }
87
+
88
+ const COMPARISON_PRESETS: { key: ComparisonPreset; label: string }[] = [
89
+ { key: "all", label: "All models" },
90
+ { key: "training-effect", label: "Training Effect" },
91
+ { key: "plus-tcself", label: "+ TC-Self" },
92
+ { key: "tcself-vs-tcneg", label: "TC-Self vs TC-Neg" },
93
+ { key: "tcself-vs-tcgpt2", label: "TC-Self vs TC-GPT2" },
94
+ ];
95
+
96
+ // Preset row sets:
97
+ // Training Effect: Base + lo/semi × {Pref, SFT, Comb-v}
98
+ const TRAINING_EFFECT_ROWS = new Set([
99
+ "Base", "Pref-lo", "SFT-lo", "Comb-v-lo", "Pref-semi", "SFT-semi", "Comb-v-semi",
100
+ ]);
101
+
102
+ const DEFAULT_HIDDEN_ROWS = new Set([
103
+ "Pref-v-lo",
104
+ "Pref-tcself-v-lo",
105
+ "Pref-tcself-norm-lo",
106
+ "Pref-tcself-norm-v-lo",
107
+ "Comb-tcself-norm-v-lo",
108
+ "SFT-tcself-norm-semi",
109
+ "Comb-tcself-norm-v-semi",
110
+ // PlausibleQA models without lo/semi — not comparable
111
+ "Comb-tcself",
112
+ "Comb-tcself-v",
113
+ "SFT-tcself-v",
114
+ ]);
115
+
116
+ // Strip a TC flag from a row label to get the "base" label for pairing
117
+ const TC_FLAGS = ["tco", "tcself", "tcneg"];
118
+ function stripTCFlag(label: string): string {
119
+ const parts = label.split("-");
120
+ return parts.filter((p) => !TC_FLAGS.includes(p)).join("-");
121
+ }
122
+ function hasTCFlag(label: string, flag: string): boolean {
123
+ return label.split("-").includes(flag);
124
+ }
125
+
126
+ // IFEval prompts 1-21 are OOD (test-only, never trained on)
127
+ const IFEVAL_OOD_MAX = 21;
128
+
129
+ // Hypernym: fixed in-domain / OOD split
130
+ const HYPERNYM_IN_DOMAIN = new Set([
131
+ "bananas", "bazookas", "cabinets", "cars", "chairs", "crows", "diapers", "dogs",
132
+ ]);
133
+ const HYPERNYM_OOD = new Set([
134
+ "ducklings", "elephants", "guns", "hammers", "helmets", "jackets", "kayaks", "kites", "mirrors",
135
+ ]);
136
+ const HYPERNYM_VALID = new Set([...HYPERNYM_IN_DOMAIN, ...HYPERNYM_OOD]);
137
+
138
+ // Valid eval task patterns — excludes training tasks like concat, bare family names, etc.
139
+ function isValidEvalTask(task: string): boolean {
140
+ if (task.startsWith("hypernym-")) {
141
+ const subtask = task.replace("hypernym-", "");
142
+ return HYPERNYM_VALID.has(subtask);
143
+ }
144
+ if (task.startsWith("ifeval-")) {
145
+ return /^ifeval-prompt[_-]\d+$/.test(task);
146
+ }
147
+ if (task.startsWith("plausibleqa-")) {
148
+ return task !== "plausibleqa"; // must have a subtask
149
+ }
150
+ if (task.startsWith("ambigqa-")) {
151
+ return task !== "ambigqa"; // must have a subtask
152
+ }
153
+ // Bare family names without subtask suffix are training tasks
154
+ if (task === "ambigqa" || task === "plausibleqa" || task === "hypernym" || task === "ifeval") {
155
+ return false;
156
+ }
157
+ return true;
158
+ }
159
+
160
+ // ---------------------------------------------------------------------------
161
+ // Color scale: RdYlGn for [0, 1] mapped to percentages
162
+ // ---------------------------------------------------------------------------
163
+
164
+ // Plotly RdYlGn colorscale stops (matches Plotly's built-in)
165
+ const RDYLGN_STOPS: [number, [number, number, number]][] = [
166
+ [0.0, [165, 0, 38]],
167
+ [0.1, [215, 48, 39]],
168
+ [0.2, [244, 109, 67]],
169
+ [0.3, [253, 174, 97]],
170
+ [0.4, [254, 224, 139]],
171
+ [0.5, [255, 255, 191]],
172
+ [0.6, [217, 239, 139]],
173
+ [0.7, [166, 217, 106]],
174
+ [0.8, [102, 189, 99]],
175
+ [0.9, [26, 152, 80]],
176
+ [1.0, [0, 104, 55]],
177
+ ];
178
+
179
+ function interpolateRdYlGn(t: number): string {
180
+ const clamped = Math.max(0, Math.min(1, t));
181
+ // Find the two stops to interpolate between
182
+ for (let i = 0; i < RDYLGN_STOPS.length - 1; i++) {
183
+ const [t0, c0] = RDYLGN_STOPS[i];
184
+ const [t1, c1] = RDYLGN_STOPS[i + 1];
185
+ if (clamped >= t0 && clamped <= t1) {
186
+ const f = (clamped - t0) / (t1 - t0);
187
+ const r = Math.round(c0[0] + f * (c1[0] - c0[0]));
188
+ const g = Math.round(c0[1] + f * (c1[1] - c0[1]));
189
+ const b = Math.round(c0[2] + f * (c1[2] - c0[2]));
190
+ return `rgb(${r}, ${g}, ${b})`;
191
+ }
192
+ }
193
+ return `rgb(0, 104, 55)`;
194
+ }
195
+
196
+ function rdYlGn(value: number): string {
197
+ if (isNaN(value)) return "#f3f4f6"; // light gray for no data
198
+ return interpolateRdYlGn(value);
199
+ }
200
+
201
+ function corrColor(value: number): string {
202
+ if (isNaN(value)) return "#f3f4f6";
203
+ return rdYlGn((value + 1) / 2);
204
+ }
205
+
206
+ function textColor(bgValue: number, isCorr: boolean): string {
207
+ if (isNaN(bgValue)) return "#9ca3af";
208
+ const t = isCorr ? (bgValue + 1) / 2 : bgValue;
209
+ // Dark text on the light middle, white text on dark extremes
210
+ return t > 0.3 && t < 0.7 ? "#1f2937" : "#ffffff";
211
+ }
212
+
213
+ // ---------------------------------------------------------------------------
214
+ // Helpers
215
+ // ---------------------------------------------------------------------------
216
+
217
+ function getEvalTCType(row: SummaryRow): TCType {
218
+ if (row.self_tc) return "self";
219
+ if (row.neg_tc) return "neg";
220
+ if (row.gpt2_tc) return "gpt2";
221
+ return "none";
222
+ }
223
+
224
+ function getTrainingTCType(row: SummaryRow): TCType {
225
+ const tc = row.training_config;
226
+ if (tc.includes("tc-online") || tc.includes("tc_online")) return "gpt2";
227
+ if (tc.includes("tc-self") || tc.includes("tc_self")) return "self";
228
+ if (tc.includes("tc-neg") || tc.includes("tc_neg")) return "neg";
229
+ return "none";
230
+ }
231
+
232
+ /** For finetuned models, check that eval TC matches training TC */
233
+ function isMatchedTC(row: SummaryRow): boolean {
234
+ if (!row.finetuned) return true; // base models: any eval TC is valid
235
+ const trainTC = getTrainingTCType(row);
236
+ if (trainTC === "none") return true; // model not trained with TC: any eval TC is valid
237
+ const evalTC = getEvalTCType(row);
238
+ return evalTC === trainTC;
239
+ }
240
+
241
+ function getTaskFamily(task: string): string | null {
242
+ for (const fam of TASK_FAMILIES) {
243
+ if (task.startsWith(fam.prefix)) return fam.key;
244
+ }
245
+ return null;
246
+ }
247
+
248
+ function parseIfevalPromptNum(task: string): number | null {
249
+ // ifeval-prompt_42 or ifeval-prompt-42
250
+ const m = task.match(/ifeval-prompt[_-](\d+)/);
251
+ return m ? parseInt(m[1], 10) : null;
252
+ }
253
+
254
+ function isOodTask(task: string, trainingConfig: string, family: string): boolean | null {
255
+ // Returns true=OOD, false=in-domain, null=unknown/not applicable
256
+ if (family === "plausibleqa" || family === "ambigqa") return true; // always OOD
257
+ if (family === "ifeval") {
258
+ const num = parseIfevalPromptNum(task);
259
+ if (num === null) return null;
260
+ return num <= IFEVAL_OOD_MAX;
261
+ }
262
+ if (family === "hypernym") {
263
+ const subtask = task.replace("hypernym-", "");
264
+ if (HYPERNYM_OOD.has(subtask)) return true;
265
+ if (HYPERNYM_IN_DOMAIN.has(subtask)) return false;
266
+ return null; // unknown subtask
267
+ }
268
+ return null;
269
+ }
270
+
271
+ function getTrainingFamily(config: string): string | null {
272
+ for (const fam of TASK_FAMILIES) {
273
+ if (config.includes(fam.prefix)) return fam.key;
274
+ }
275
+ return null;
276
+ }
277
+
278
+ function extractFloat(pattern: RegExp, text: string): number | null {
279
+ const m = text.match(pattern);
280
+ return m ? parseFloat(m[1]) : null;
281
+ }
282
+
283
+ function parseTrainingMode(config: string): string {
284
+ // Parse pref/nllv/nllg weights from training config.
285
+ // Defaults when absent: pref=1.0, nllv=0.0, nllg=0.0
286
+ const pref = extractFloat(/(?:^|[_-])pref(\d+(?:\.\d+)?)/, config) ?? 1.0;
287
+ const nllv = extractFloat(/nllv(\d+(?:\.\d+)?)/, config) ?? 0.0;
288
+ const nllg = extractFloat(/nllg(\d+(?:\.\d+)?)/, config) ?? 0.0;
289
+
290
+ const isSFT = pref === 0.0 && nllv === 1.0 && nllg === 1.0;
291
+ const isPref = nllv === 0.0 && nllg === 0.0;
292
+ const isComb = nllv === 1.0 && nllg === 1.0 && pref === 1.0;
293
+
294
+ if (isSFT) return "SFT";
295
+ if (isComb) return "Comb";
296
+ if (isPref) return "Pref";
297
+ return "Pref"; // fallback
298
+ }
299
+
300
+ function buildRowLabel(config: string): string {
301
+ const mode = parseTrainingMode(config);
302
+ const flags: string[] = [];
303
+
304
+ // TC flags are mutually exclusive
305
+ const hasTco = config.includes("_tc-online_") || config.includes("-tc-online-");
306
+ const hasTcself = config.includes("_tc-self_") || config.includes("-tc-self-");
307
+ const hasTcneg = config.includes("_tc-neg_") || config.includes("-tc-neg-");
308
+ const tcCount = [hasTco, hasTcself, hasTcneg].filter(Boolean).length;
309
+ if (tcCount > 1) {
310
+ console.warn(`Multiple TC flags in training config (expected at most 1): ${config}`);
311
+ }
312
+ if (hasTco) flags.push("tco");
313
+ if (hasTcself) flags.push("tcself");
314
+ if (hasTcneg) flags.push("tcneg");
315
+ // Optional independent flags
316
+ if (config.includes("_lenorm_") || config.includes("-lenorm-")) flags.push("norm");
317
+ if (config.includes("_vallogodds") || config.includes("-vallogodds")) flags.push("v");
318
+ // Data regime flags
319
+ if (config.includes("labelonly")) flags.push("lo");
320
+ if (config.includes("semi")) flags.push("semi");
321
+
322
+ const parts = [mode, ...flags];
323
+ return parts.join("-");
324
+ }
325
+
326
+ function getRowLabel(row: SummaryRow): string {
327
+ if (!row.finetuned) return "Base";
328
+ return buildRowLabel(row.training_config);
329
+ }
330
+
331
+ // Display-friendly row label: reorder to [regime, tc, mode, extras], spaces, Pref→RankAlign
332
+ function displayRowLabel(label: string): string {
333
+ if (label === "Base") return "Base";
334
+ const parts = label.split("-");
335
+ const mode = parts[0] === "Pref" ? "RankAlign" : parts[0];
336
+ const flags = parts.slice(1);
337
+
338
+ // Extract known flag groups
339
+ const regime = flags.filter((f) => f === "lo" || f === "semi");
340
+ const tc = flags.filter((f) => f === "tcself" || f === "tcneg" || f === "tco");
341
+ const extras = flags.filter((f) => !["lo", "semi", "tcself", "tcneg", "tco"].includes(f));
342
+
343
+ // Order: regime, tc, mode, extras
344
+ return [...regime, ...tc, mode, ...extras].join(" ");
345
+ }
346
+
347
+ // JSX version with colored TC flags for use in HTML contexts
348
+ function DisplayRowLabel({ label }: { label: string }) {
349
+ if (label === "Base") return <>Base</>;
350
+ const parts = label.split("-");
351
+ const mode = parts[0] === "Pref" ? "RankAlign" : parts[0];
352
+ const flags = parts.slice(1);
353
+ const regime = flags.filter((f) => f === "lo" || f === "semi");
354
+ const tc = flags.filter((f) => f === "tcself" || f === "tcneg" || f === "tco");
355
+ const extras = flags.filter((f) => !["lo", "semi", "tcself", "tcneg", "tco"].includes(f));
356
+ const tokens = [...regime, ...tc, mode, ...extras];
357
+
358
+ const tcColor: Record<string, string> = { tcself: "#f87171", tcneg: "#fb923c", tco: "#a78bfa" };
359
+
360
+ return (
361
+ <>
362
+ {tokens.map((t, i) => (
363
+ <span key={i}>
364
+ {i > 0 && " "}
365
+ {tcColor[t] ? <span style={{ color: tcColor[t] }}>{t}</span> : t}
366
+ </span>
367
+ ))}
368
+ </>
369
+ );
370
+ }
371
+
372
+ // SVG version with colored TC flags (uses tspan)
373
+ function SvgRowLabel({ label, maxLen = 22 }: { label: string; maxLen?: number }) {
374
+ if (label === "Base") return <>Base</>;
375
+ const parts = label.split("-");
376
+ const mode = parts[0] === "Pref" ? "RankAlign" : parts[0];
377
+ const flags = parts.slice(1);
378
+ const regime = flags.filter((f) => f === "lo" || f === "semi");
379
+ const tc = flags.filter((f) => f === "tcself" || f === "tcneg" || f === "tco");
380
+ const extras = flags.filter((f) => !["lo", "semi", "tcself", "tcneg", "tco"].includes(f));
381
+ const tokens = [...regime, ...tc, mode, ...extras];
382
+ const full = tokens.join(" ");
383
+ const display = full.length > maxLen ? full.slice(0, maxLen - 2) + ".." : full;
384
+
385
+ const tcColor: Record<string, string> = { tcself: "#f87171", tcneg: "#fb923c", tco: "#a78bfa" };
386
+
387
+ // Re-tokenize the display string to color tc flags
388
+ const displayTokens = display.split(" ");
389
+ return (
390
+ <>
391
+ {displayTokens.map((t, i) => (
392
+ <tspan key={i} fill={tcColor[t] || undefined}>
393
+ {i > 0 && " "}{t}
394
+ </tspan>
395
+ ))}
396
+ </>
397
+ );
398
+ }
399
+
400
+ function mean(values: number[]): number {
401
+ if (values.length === 0) return NaN;
402
+ return values.reduce((a, b) => a + b, 0) / values.length;
403
+ }
404
+
405
+ function stdErr(values: number[]): number {
406
+ if (values.length < 2) return 0;
407
+ const m = mean(values);
408
+ const variance = values.reduce((sum, v) => sum + (v - m) ** 2, 0) / (values.length - 1);
409
+ return Math.sqrt(variance) / Math.sqrt(values.length);
410
+ }
411
+
412
+ // ---------------------------------------------------------------------------
413
+ // Data fetching — downloads the parquet file directly (1 request, ~1MB)
414
+ // ---------------------------------------------------------------------------
415
+
416
+ function useDatasetRows(repo: string, _split: string) {
417
+ const [rows, setRows] = useState<SummaryRow[]>([]);
418
+ const [loading, setLoading] = useState(true);
419
+ const [error, setError] = useState<string | null>(null);
420
+ const [progress, setProgress] = useState({ loaded: 0, total: 0 });
421
+
422
+ const fetchAll = useCallback(async () => {
423
+ setLoading(true);
424
+ setError(null);
425
+ setRows([]);
426
+ try {
427
+ // Step 1: get the parquet file URL from HF datasets server
428
+ const metaUrl = `${HF_DATASETS_API}/parquet?dataset=${encodeURIComponent(repo)}`;
429
+ const metaResp = await fetch(metaUrl);
430
+ if (!metaResp.ok) {
431
+ throw new Error(`Failed to get parquet info: ${metaResp.status}`);
432
+ }
433
+ const metaData = await metaResp.json();
434
+ const parquetFiles = metaData.parquet_files ?? [];
435
+ const matchingFile = parquetFiles.find((f: { split: string }) => f.split === _split) ?? parquetFiles[0];
436
+ if (!matchingFile) {
437
+ throw new Error("No parquet files found for this dataset");
438
+ }
439
+
440
+ setProgress({ loaded: 0, total: matchingFile.size ?? 0 });
441
+
442
+ // Step 2: download the parquet file directly from HF (static file, no rate limits)
443
+ const parquetUrl: string = matchingFile.url;
444
+ const { asyncBufferFromUrl, parquetRead } = await import("hyparquet");
445
+
446
+ const file = await asyncBufferFromUrl({ url: parquetUrl });
447
+
448
+ // Step 3: parse all rows
449
+ const allRows: SummaryRow[] = [];
450
+ await parquetRead({
451
+ file,
452
+ rowFormat: "object",
453
+ onComplete: (data: Record<string, unknown>[]) => {
454
+ for (const row of data) {
455
+ allRows.push(row as unknown as SummaryRow);
456
+ }
457
+ },
458
+ });
459
+
460
+ setProgress({ loaded: allRows.length, total: allRows.length });
461
+ setRows(allRows);
462
+ } catch (err) {
463
+ setError(err instanceof Error ? err.message : String(err));
464
+ } finally {
465
+ setLoading(false);
466
+ }
467
+ }, [repo, _split]);
468
+
469
+ useEffect(() => {
470
+ fetchAll();
471
+ }, [fetchAll]);
472
+
473
+ return { rows, loading, error, progress, refetch: fetchAll };
474
+ }
475
+
476
+ // ---------------------------------------------------------------------------
477
+ // Aggregation
478
+ // ---------------------------------------------------------------------------
479
+
480
+ interface AggCell {
481
+ mean: number;
482
+ se: number;
483
+ n: number;
484
+ }
485
+
486
+ /**
487
+ * Validate that each row label maps to exactly one model identity.
488
+ * Checks BOTH the base model (r.model) AND training_config.
489
+ * If two different models or configs produce the same row label,
490
+ * we're silently averaging different models — a critical bug.
491
+ *
492
+ * Returns null if clean, or an error message string if collisions found.
493
+ */
494
+ function validateRowLabels(rows: SummaryRow[], groupByRow: (r: SummaryRow) => string): string | null {
495
+ // Check 1: all rows must come from the same base model
496
+ const baseModels = new Set(rows.map((r) => r.model));
497
+ if (baseModels.size > 1) {
498
+ return (
499
+ `DATA INTEGRITY ERROR: Multiple base models in the same view!\n` +
500
+ `Found ${baseModels.size} different models being mixed together:\n` +
501
+ Array.from(baseModels).map((m) => ` • ${m}`).join("\n") +
502
+ `\n\nThis means results from different models are being averaged. ` +
503
+ `Filter by a single model before displaying.`
504
+ );
505
+ }
506
+
507
+ // Check 2: each row label maps to exactly one training_config
508
+ const labelToConfigs = new Map<string, Set<string>>();
509
+ for (const row of rows) {
510
+ const label = groupByRow(row);
511
+ const config = row.finetuned ? row.training_config : "__base__";
512
+ if (!labelToConfigs.has(label)) labelToConfigs.set(label, new Set());
513
+ labelToConfigs.get(label)!.add(config);
514
+ }
515
+ const collisions: string[] = [];
516
+ for (const [label, configs] of labelToConfigs) {
517
+ if (configs.size > 1) {
518
+ collisions.push(
519
+ `"${label}" → ${configs.size} configs: ${Array.from(configs).join(", ")}`
520
+ );
521
+ }
522
+ }
523
+ if (collisions.length > 0) {
524
+ return (
525
+ `DATA INTEGRITY ERROR: Row label collisions detected!\n` +
526
+ `The following labels map to multiple different model configs ` +
527
+ `(their results are being silently averaged):\n\n` +
528
+ collisions.join("\n")
529
+ );
530
+ }
531
+
532
+ return null;
533
+ }
534
+
535
+ interface AggResult {
536
+ data: Map<string, Map<EvalVariant, AggCell>>;
537
+ validationError: string | null;
538
+ }
539
+
540
+ function aggregateData(
541
+ rows: SummaryRow[],
542
+ metric: Metric,
543
+ groupByRow: (r: SummaryRow) => string,
544
+ ): AggResult {
545
+ // Validate: each row label must correspond to one model
546
+ const validationError = validateRowLabels(rows, groupByRow);
547
+
548
+ // Group: rowLabel → evalVariant → values[]
549
+ const groups = new Map<string, Map<EvalVariant, number[]>>();
550
+
551
+ for (const row of rows) {
552
+ const rl = groupByRow(row);
553
+ const ev = row.eval_variant as EvalVariant;
554
+ if (!EVAL_VARIANTS.includes(ev)) continue;
555
+ const val = row[metric];
556
+ if (val === null || val === undefined || isNaN(val as number)) continue;
557
+
558
+ if (!groups.has(rl)) groups.set(rl, new Map());
559
+ const evMap = groups.get(rl)!;
560
+ if (!evMap.has(ev)) evMap.set(ev, []);
561
+ evMap.get(ev)!.push(val as number);
562
+ }
563
+
564
+ // Compute aggregates
565
+ const result = new Map<string, Map<EvalVariant, AggCell>>();
566
+ for (const [rl, evMap] of groups) {
567
+ const aggMap = new Map<EvalVariant, AggCell>();
568
+ for (const [ev, vals] of evMap) {
569
+ aggMap.set(ev, { mean: mean(vals), se: stdErr(vals), n: vals.length });
570
+ }
571
+ result.set(rl, aggMap);
572
+ }
573
+ return { data: result, validationError };
574
+ }
575
+
576
+ // ---------------------------------------------------------------------------
577
+ // Sort row labels: Base first, then alphabetical
578
+ // ---------------------------------------------------------------------------
579
+
580
+ // Sort order: Base first, then by [tc group, data regime, mode]
581
+ // tc group: no-tc (0) < tcself (1) < tcneg (2) < tco (3)
582
+ // data regime: lo (0) < semi (1) < other (2)
583
+ // mode: Pref (0) < SFT (1) < Comb (2) < other (3)
584
+ function rowSortKey(label: string): [number, number, number, number, string] {
585
+ if (label === "Base") return [-1, 0, 0, 0, ""];
586
+ const parts = label.split("-");
587
+
588
+ // TC group
589
+ let tcGroup = 0;
590
+ if (parts.includes("tcself")) tcGroup = 1;
591
+ else if (parts.includes("tcneg")) tcGroup = 2;
592
+ else if (parts.includes("tco")) tcGroup = 3;
593
+
594
+ // Data regime
595
+ let regime = 2;
596
+ if (parts.includes("lo")) regime = 0;
597
+ else if (parts.includes("semi")) regime = 1;
598
+
599
+ // Mode (first part)
600
+ const mode = parts[0] ?? "";
601
+ const modeOrder: Record<string, number> = { Pref: 0, SFT: 1, Comb: 2 };
602
+
603
+ // Remaining flags for tiebreak
604
+ const remaining = parts.filter((p) => !["Pref", "SFT", "Comb"].includes(p)).join("-");
605
+
606
+ return [0, tcGroup, regime, modeOrder[mode] ?? 3, remaining];
607
+ }
608
+
609
+ function sortRowLabels(labels: string[]): string[] {
610
+ return [...labels].sort((a, b) => {
611
+ const ka = rowSortKey(a);
612
+ const kb = rowSortKey(b);
613
+ for (let i = 0; i < 4; i++) {
614
+ if (ka[i] !== kb[i]) return (ka[i] as number) - (kb[i] as number);
615
+ }
616
+ return ka[4].localeCompare(kb[4]);
617
+ });
618
+ }
619
+
620
+ // ---------------------------------------------------------------------------
621
+ // Sub-components
622
+ // ---------------------------------------------------------------------------
623
+
624
+ function Dropdown<T extends string>({
625
+ label,
626
+ value,
627
+ options,
628
+ onChange,
629
+ }: {
630
+ label: string;
631
+ value: T;
632
+ options: { key: T; label: string }[];
633
+ onChange: (v: T) => void;
634
+ }) {
635
+ return (
636
+ <div className="flex flex-col gap-1">
637
+ <label className="text-[10px] uppercase tracking-wider text-gray-500 font-medium">{label}</label>
638
+ <select
639
+ value={value}
640
+ onChange={(e) => onChange(e.target.value as T)}
641
+ className="bg-gray-800 text-gray-200 text-xs border border-gray-700 rounded px-2 py-1.5 focus:outline-none focus:border-cyan-600"
642
+ >
643
+ {options.map((o) => (
644
+ <option key={o.key} value={o.key}>
645
+ {o.label}
646
+ </option>
647
+ ))}
648
+ </select>
649
+ </div>
650
+ );
651
+ }
652
+
653
+ function MultiSelect<T extends string>({
654
+ label,
655
+ selected,
656
+ options,
657
+ onChange,
658
+ }: {
659
+ label: string;
660
+ selected: Set<T>;
661
+ options: { key: T; label: string }[];
662
+ onChange: (s: Set<T>) => void;
663
+ }) {
664
+ const toggle = (key: T) => {
665
+ const next = new Set(selected);
666
+ if (next.has(key)) next.delete(key);
667
+ else next.add(key);
668
+ onChange(next);
669
+ };
670
+
671
+ return (
672
+ <div className="flex flex-col gap-1">
673
+ <label className="text-[10px] uppercase tracking-wider text-gray-500 font-medium">{label}</label>
674
+ <div className="flex flex-wrap gap-1">
675
+ {options.map((o) => (
676
+ <button
677
+ key={o.key}
678
+ onClick={() => toggle(o.key)}
679
+ className={`text-xs px-2 py-1 rounded border transition-colors ${
680
+ selected.has(o.key)
681
+ ? "bg-cyan-800/60 text-cyan-200 border-cyan-600/60"
682
+ : "bg-gray-800 text-gray-500 border-gray-700 hover:text-gray-300"
683
+ }`}
684
+ >
685
+ {o.label}
686
+ </button>
687
+ ))}
688
+ </div>
689
+ </div>
690
+ );
691
+ }
692
+
693
+ // ---------------------------------------------------------------------------
694
+ // Heatmap component (pure HTML/CSS)
695
+ // ---------------------------------------------------------------------------
696
+
697
+ function HeatmapGrid({
698
+ data,
699
+ rowLabels,
700
+ colLabels,
701
+ metric,
702
+ title,
703
+ fullConfigs,
704
+ }: {
705
+ data: Map<string, Map<EvalVariant, AggCell>>;
706
+ rowLabels: string[];
707
+ colLabels: EvalVariant[];
708
+ metric: Metric;
709
+ title: string;
710
+ fullConfigs: Map<string, string>;
711
+ }) {
712
+ const isCorr = metric.startsWith("corr");
713
+ const formatVal = (v: number) => {
714
+ if (isNaN(v)) return "-";
715
+ return isCorr ? v.toFixed(3) : (v * 100).toFixed(1);
716
+ };
717
+ const colorFn = isCorr ? corrColor : rdYlGn;
718
+
719
+ return (
720
+ <div className="flex flex-col gap-2">
721
+ <h3 className="text-xs font-medium text-gray-300">{title}</h3>
722
+ <div className="overflow-x-auto">
723
+ <table className="border-collapse text-xs">
724
+ <thead>
725
+ <tr>
726
+ <th className="text-left py-1 px-2 text-gray-500 font-normal min-w-[160px] max-w-[240px]">Model</th>
727
+ {colLabels.map((col) => (
728
+ <th key={col} className="text-center py-1 px-3 text-gray-400 font-medium min-w-[72px]">
729
+ {col}
730
+ </th>
731
+ ))}
732
+ </tr>
733
+ </thead>
734
+ <tbody>
735
+ {rowLabels.map((rl) => {
736
+ const evMap = data.get(rl);
737
+ return (
738
+ <tr key={rl} className="group">
739
+ <td
740
+ className="py-1 px-2 text-gray-300 font-mono truncate max-w-[240px]"
741
+ title={fullConfigs.get(rl) || rl}
742
+ >
743
+ <DisplayRowLabel label={rl} />
744
+ </td>
745
+ {colLabels.map((col) => {
746
+ const cell = evMap?.get(col);
747
+ const val = cell?.mean ?? NaN;
748
+ const bg = colorFn(isCorr ? val : val);
749
+ const fg = textColor(val, isCorr);
750
+ return (
751
+ <td
752
+ key={col}
753
+ className="text-center py-1.5 px-2 font-mono border border-gray-800/50 cursor-default transition-all hover:ring-1 hover:ring-cyan-500/50"
754
+ style={{ backgroundColor: bg, color: fg }}
755
+ title={cell ? `${formatVal(val)}${cell.n > 1 ? ` (n=${cell.n}, se=${formatVal(cell.se)})` : ""}` : "no data"}
756
+ >
757
+ {formatVal(val)}
758
+ </td>
759
+ );
760
+ })}
761
+ </tr>
762
+ );
763
+ })}
764
+ </tbody>
765
+ </table>
766
+ </div>
767
+ </div>
768
+ );
769
+ }
770
+
771
+ // ---------------------------------------------------------------------------
772
+ // Bar chart component (SVG)
773
+ // ---------------------------------------------------------------------------
774
+
775
+ function BarChart({
776
+ data,
777
+ rowLabels,
778
+ evalVariants,
779
+ metric,
780
+ title,
781
+ }: {
782
+ data: Map<string, Map<EvalVariant, AggCell>>;
783
+ rowLabels: string[];
784
+ evalVariants: EvalVariant[];
785
+ metric: Metric;
786
+ title: string;
787
+ }) {
788
+ const isCorr = metric.startsWith("corr");
789
+ const subBarWidth = 18;
790
+ const subGap = 2;
791
+ const groupGap = 16;
792
+ const numSub = evalVariants.length;
793
+ const groupWidth = numSub * subBarWidth + (numSub - 1) * subGap;
794
+ const chartHeight = 200;
795
+ const marginTop = 20;
796
+ const marginBottom = 80;
797
+ const marginLeft = 50;
798
+ const legendHeight = 24;
799
+ const svgWidth = marginLeft + rowLabels.length * (groupWidth + groupGap) + 20;
800
+ const svgHeight = chartHeight + marginTop + marginBottom + legendHeight;
801
+
802
+ // Collect all values for scale
803
+ const allCells: { mean: number; se: number }[] = [];
804
+ for (const rl of rowLabels) {
805
+ for (const ev of evalVariants) {
806
+ const cell = data.get(rl)?.get(ev);
807
+ if (cell && !isNaN(cell.mean)) allCells.push(cell);
808
+ }
809
+ }
810
+ if (allCells.length === 0) {
811
+ return (
812
+ <div className="flex flex-col gap-2">
813
+ <h3 className="text-xs font-medium text-gray-300">{title}</h3>
814
+ <p className="text-xs text-gray-500 italic">No data</p>
815
+ </div>
816
+ );
817
+ }
818
+
819
+ let minVal: number, maxVal: number;
820
+ const low = Math.min(...allCells.map((c) => c.mean - c.se));
821
+ const high = Math.max(...allCells.map((c) => c.mean + c.se));
822
+ if (isCorr) {
823
+ // Data-driven range with padding, clamped to [-1, 1]
824
+ minVal = Math.max(-1, low - 0.05);
825
+ maxVal = Math.min(1, high + 0.05);
826
+ } else {
827
+ minVal = Math.max(0, low - 0.05);
828
+ maxVal = Math.min(1, high + 0.05);
829
+ }
830
+ const range = maxVal - minVal || 1;
831
+ const yScale = (v: number) => marginTop + chartHeight * (1 - (v - minVal) / range);
832
+
833
+ // Tick marks
834
+ const ticks: number[] = [];
835
+ const corrRange = maxVal - minVal;
836
+ const step = isCorr ? (corrRange > 0.5 ? 0.2 : 0.1) : 0.1;
837
+ for (let t = Math.ceil(minVal / step) * step; t <= maxVal; t += step) {
838
+ ticks.push(Math.round(t * 1000) / 1000);
839
+ }
840
+
841
+ // Shade: raw=full, tc=darker, lenorm=lighter, tc+lenorm=darkest
842
+ const variantOpacity: Record<string, number> = { raw: 0.85, tc: 0.6, lenorm: 0.7, "tc+lenorm": 0.45 };
843
+
844
+ return (
845
+ <div className="flex flex-col gap-2">
846
+ <h3 className="text-xs font-medium text-gray-300">{title}</h3>
847
+ <div className="overflow-x-auto">
848
+ <svg width={svgWidth} height={svgHeight} className="block">
849
+ {/* Hatching patterns for tc variants */}
850
+ <defs>
851
+ <pattern id="hatch-tc" patternUnits="userSpaceOnUse" width="4" height="4" patternTransform="rotate(45)">
852
+ <line x1="0" y1="0" x2="0" y2="4" stroke="rgba(0,0,0,0.3)" strokeWidth="1" />
853
+ </pattern>
854
+ <pattern id="hatch-tclenorm" patternUnits="userSpaceOnUse" width="4" height="4" patternTransform="rotate(-45)">
855
+ <line x1="0" y1="0" x2="0" y2="4" stroke="rgba(0,0,0,0.4)" strokeWidth="1.5" />
856
+ </pattern>
857
+ </defs>
858
+ {/* Grid lines */}
859
+ {ticks.map((t) => (
860
+ <g key={t}>
861
+ <line x1={marginLeft} x2={svgWidth - 10} y1={yScale(t)} y2={yScale(t)} stroke="#374151" strokeWidth={0.5} />
862
+ <text x={marginLeft - 4} y={yScale(t) + 3} textAnchor="end" className="fill-gray-500 text-[10px]">
863
+ {isCorr ? t.toFixed(1) : (t * 100).toFixed(0)}
864
+ </text>
865
+ </g>
866
+ ))}
867
+ {/* Grouped bars */}
868
+ {rowLabels.map((rl, i) => {
869
+ const groupX = marginLeft + i * (groupWidth + groupGap);
870
+ const color = BAR_COLORS[i % BAR_COLORS.length];
871
+ const labelX = groupX + groupWidth / 2;
872
+ return (
873
+ <g key={rl}>
874
+ {evalVariants.map((ev, j) => {
875
+ const cell = data.get(rl)?.get(ev);
876
+ const val = cell?.mean ?? NaN;
877
+ if (isNaN(val)) return null;
878
+ const x = groupX + j * (subBarWidth + subGap);
879
+ const barY = yScale(val);
880
+ const baseY = yScale(isCorr ? 0 : minVal);
881
+ const barH = Math.abs(baseY - barY);
882
+ const actualY = Math.min(barY, baseY);
883
+ const opacity = variantOpacity[ev] ?? 0.85;
884
+ const hasHatch = ev === "tc" || ev === "tc+lenorm";
885
+ const cx = x + subBarWidth / 2;
886
+ const se = cell?.se ?? 0;
887
+ const seTop = yScale(Math.min(maxVal, val + se));
888
+ const seBot = yScale(Math.max(minVal, val - se));
889
+ return (
890
+ <g key={ev}>
891
+ <rect x={x} y={actualY} width={subBarWidth} height={barH} fill={color} rx={1} opacity={opacity} />
892
+ {hasHatch && (
893
+ <rect x={x} y={actualY} width={subBarWidth} height={barH} fill={ev === "tc" ? "url(#hatch-tc)" : "url(#hatch-tclenorm)"} rx={1} />
894
+ )}
895
+ {se > 0 && (
896
+ <>
897
+ <line x1={cx} x2={cx} y1={seTop} y2={seBot} stroke="#e5e7eb" strokeWidth={1} />
898
+ <line x1={cx - 3} x2={cx + 3} y1={seTop} y2={seTop} stroke="#e5e7eb" strokeWidth={1} />
899
+ <line x1={cx - 3} x2={cx + 3} y1={seBot} y2={seBot} stroke="#e5e7eb" strokeWidth={1} />
900
+ </>
901
+ )}
902
+ <title>{`${displayRowLabel(rl)} [${ev}]: ${isCorr ? val.toFixed(3) : (val * 100).toFixed(1)} (n=${cell?.n ?? 0}, se=${se.toFixed(4)})`}</title>
903
+ </g>
904
+ );
905
+ })}
906
+ {/* Model label */}
907
+ <text
908
+ x={labelX}
909
+ y={marginTop + chartHeight + 8}
910
+ textAnchor="end"
911
+ transform={`rotate(-45, ${labelX}, ${marginTop + chartHeight + 8})`}
912
+ className="fill-gray-400 text-[9px]"
913
+ >
914
+ <SvgRowLabel label={rl} maxLen={22} />
915
+ </text>
916
+ </g>
917
+ );
918
+ })}
919
+ {/* Legend */}
920
+ {evalVariants.map((ev, j) => {
921
+ const lx = marginLeft + j * 80;
922
+ const ly = svgHeight - 10;
923
+ const opacity = variantOpacity[ev] ?? 0.85;
924
+ const hasHatch = ev === "tc" || ev === "tc+lenorm";
925
+ return (
926
+ <g key={ev}>
927
+ <rect x={lx} y={ly - 8} width={12} height={8} fill="#636EFA" opacity={opacity} rx={1} />
928
+ {hasHatch && <rect x={lx} y={ly - 8} width={12} height={8} fill={ev === "tc" ? "url(#hatch-tc)" : "url(#hatch-tclenorm)"} rx={1} />}
929
+ <text x={lx + 16} y={ly} className="fill-gray-400 text-[9px]">{ev}</text>
930
+ </g>
931
+ );
932
+ })}
933
+ </svg>
934
+ </div>
935
+ </div>
936
+ );
937
+ }
938
+
939
+ // ---------------------------------------------------------------------------
940
+ // Delta comparison view: side-by-side heatmaps + delta
941
+ // ---------------------------------------------------------------------------
942
+
943
+ // Diverging color scale for deltas: red (negative) ↔ white (zero) ↔ blue (positive)
944
+ // Uses Plotly's RdBu stops (reversed so blue = positive)
945
+ const DELTA_NEG: [number, number, number][] = [
946
+ [103, 0, 31], // -max: deep red
947
+ [178, 24, 43],
948
+ [214, 96, 77],
949
+ [244, 165, 130],
950
+ [253, 219, 199],
951
+ [255, 255, 255], // zero: white
952
+ ];
953
+ const DELTA_POS: [number, number, number][] = [
954
+ [255, 255, 255], // zero: white
955
+ [209, 229, 240],
956
+ [146, 197, 222],
957
+ [67, 147, 195],
958
+ [33, 102, 172],
959
+ [5, 48, 97], // +max: deep blue
960
+ ];
961
+
962
+ function deltaColor(delta: number): string {
963
+ if (isNaN(delta)) return "#f3f4f6";
964
+ const maxDelta = 0.10; // saturate at ±10pp
965
+ const t = Math.min(Math.abs(delta) / maxDelta, 1); // 0..1
966
+ const stops = delta < 0 ? DELTA_NEG : DELTA_POS;
967
+ // Interpolate across 6 stops (indices 0..5), t maps to position in stops
968
+ const pos = t * (stops.length - 1);
969
+ const i = Math.min(Math.floor(pos), stops.length - 2);
970
+ const f = pos - i;
971
+ // For negative, go from white (index 5) toward deep red (index 0) → reverse index
972
+ const idx = delta < 0 ? (stops.length - 1) - i : i;
973
+ const idxNext = delta < 0 ? idx - 1 : idx + 1;
974
+ const c0 = stops[idx];
975
+ const c1 = stops[Math.max(0, Math.min(stops.length - 1, idxNext))];
976
+ const r = Math.round(c0[0] + f * (c1[0] - c0[0]));
977
+ const g = Math.round(c0[1] + f * (c1[1] - c0[1]));
978
+ const b = Math.round(c0[2] + f * (c1[2] - c0[2]));
979
+ return `rgb(${r}, ${g}, ${b})`;
980
+ }
981
+
982
+ function deltaTextColor(delta: number): string {
983
+ if (isNaN(delta)) return "#9ca3af";
984
+ const t = Math.min(Math.abs(delta) / 0.10, 1);
985
+ return t > 0.6 ? "#ffffff" : "#1f2937";
986
+ }
987
+
988
+ function SideBySideDelta({
989
+ pairs,
990
+ leftData,
991
+ rightData,
992
+ colLabels,
993
+ metric,
994
+ title,
995
+ leftLabel,
996
+ rightLabel,
997
+ fullConfigs,
998
+ }: {
999
+ pairs: [string, string][]; // [leftRowLabel, rightRowLabel]
1000
+ leftData: Map<string, Map<EvalVariant, AggCell>>;
1001
+ rightData: Map<string, Map<EvalVariant, AggCell>>;
1002
+ colLabels: EvalVariant[];
1003
+ metric: Metric;
1004
+ title: string;
1005
+ leftLabel: string;
1006
+ rightLabel: string;
1007
+ fullConfigs: Map<string, string>;
1008
+ }) {
1009
+ const isCorr = metric.startsWith("corr");
1010
+ const formatVal = (v: number) => {
1011
+ if (isNaN(v)) return "-";
1012
+ return isCorr ? v.toFixed(3) : (v * 100).toFixed(1);
1013
+ };
1014
+ const formatDelta = (v: number) => {
1015
+ if (isNaN(v)) return "-";
1016
+ const pp = isCorr ? v : v * 100;
1017
+ const sign = pp > 0 ? "+" : "";
1018
+ return isCorr ? `${sign}${pp.toFixed(3)}` : `${sign}${pp.toFixed(1)}`;
1019
+ };
1020
+ const colorFn = isCorr ? corrColor : rdYlGn;
1021
+
1022
+ // Build pair labels (strip the tc flag difference for a clean display)
1023
+ const pairLabels = pairs.map(([l]) => l);
1024
+
1025
+ return (
1026
+ <div className="flex flex-col gap-3">
1027
+ <h3 className="text-xs font-medium text-gray-300">{title}</h3>
1028
+ <div className="flex gap-6 overflow-x-auto">
1029
+ {/* Left heatmap */}
1030
+ <div>
1031
+ <p className="text-[10px] uppercase tracking-wider text-gray-500 font-medium mb-1">{leftLabel}</p>
1032
+ <table className="border-collapse text-xs">
1033
+ <thead>
1034
+ <tr>
1035
+ <th className="text-left py-1 px-2 text-gray-500 font-normal min-w-[120px]">Model</th>
1036
+ {colLabels.map((col) => (
1037
+ <th key={col} className="text-center py-1 px-3 text-gray-400 font-medium min-w-[60px]">{col}</th>
1038
+ ))}
1039
+ </tr>
1040
+ </thead>
1041
+ <tbody>
1042
+ {pairs.map(([leftLabel_]) => {
1043
+ const evMap = leftData.get(leftLabel_);
1044
+ return (
1045
+ <tr key={leftLabel_}>
1046
+ <td className="py-1 px-2 text-gray-300 font-mono text-[11px]" title={fullConfigs.get(leftLabel_) || leftLabel_}><DisplayRowLabel label={leftLabel_} /></td>
1047
+ {colLabels.map((col) => {
1048
+ const cell = evMap?.get(col);
1049
+ const val = cell?.mean ?? NaN;
1050
+ const bg = colorFn(val);
1051
+ const fg = textColor(val, isCorr);
1052
+ return (
1053
+ <td key={col} className="text-center py-1.5 px-2 font-mono border border-gray-800/50" style={{ backgroundColor: bg, color: fg }}>
1054
+ {formatVal(val)}
1055
+ </td>
1056
+ );
1057
+ })}
1058
+ </tr>
1059
+ );
1060
+ })}
1061
+ </tbody>
1062
+ </table>
1063
+ </div>
1064
+
1065
+ {/* Right heatmap */}
1066
+ <div>
1067
+ <p className="text-[10px] uppercase tracking-wider text-gray-500 font-medium mb-1">{rightLabel}</p>
1068
+ <table className="border-collapse text-xs">
1069
+ <thead>
1070
+ <tr>
1071
+ <th className="text-left py-1 px-1.5 text-gray-500 font-normal min-w-[80px]">Model</th>
1072
+ {colLabels.map((col) => (
1073
+ <th key={col} className="text-center py-1 px-3 text-gray-400 font-medium min-w-[60px]">{col}</th>
1074
+ ))}
1075
+ </tr>
1076
+ </thead>
1077
+ <tbody>
1078
+ {pairs.map(([, rightLabel_]) => {
1079
+ const evMap = rightData.get(rightLabel_);
1080
+ return (
1081
+ <tr key={rightLabel_}>
1082
+ <td className="py-1 px-1.5 text-gray-300 font-mono text-[9px] whitespace-nowrap" title={fullConfigs.get(rightLabel_) || rightLabel_}><DisplayRowLabel label={rightLabel_} /></td>
1083
+ {colLabels.map((col) => {
1084
+ const cell = evMap?.get(col);
1085
+ const val = cell?.mean ?? NaN;
1086
+ const bg = colorFn(val);
1087
+ const fg = textColor(val, isCorr);
1088
+ return (
1089
+ <td key={col} className="text-center py-1.5 px-2 font-mono border border-gray-800/50" style={{ backgroundColor: bg, color: fg }}>
1090
+ {formatVal(val)}
1091
+ </td>
1092
+ );
1093
+ })}
1094
+ </tr>
1095
+ );
1096
+ })}
1097
+ </tbody>
1098
+ </table>
1099
+ </div>
1100
+
1101
+ {/* Delta heatmap */}
1102
+ <div>
1103
+ <p className="text-[10px] uppercase tracking-wider text-gray-500 font-medium mb-1">Delta ({rightLabel} − {leftLabel})</p>
1104
+ <table className="border-collapse text-xs">
1105
+ <thead>
1106
+ <tr>
1107
+ <th className="text-left py-1 px-2 text-gray-500 font-normal min-w-[120px]">Model</th>
1108
+ {colLabels.map((col) => (
1109
+ <th key={col} className="text-center py-1 px-3 text-gray-400 font-medium min-w-[60px]">{col}</th>
1110
+ ))}
1111
+ </tr>
1112
+ </thead>
1113
+ <tbody>
1114
+ {pairs.map(([leftLabel_, rightLabel_]) => {
1115
+ const leftEvMap = leftData.get(leftLabel_);
1116
+ const rightEvMap = rightData.get(rightLabel_);
1117
+ return (
1118
+ <tr key={leftLabel_}>
1119
+ <td className="py-1 px-2 text-gray-300 font-mono text-[11px]"><DisplayRowLabel label={pairLabels[pairs.findIndex(([l]) => l === leftLabel_)]} /></td>
1120
+ {colLabels.map((col) => {
1121
+ const leftVal = leftEvMap?.get(col)?.mean ?? NaN;
1122
+ const rightVal = rightEvMap?.get(col)?.mean ?? NaN;
1123
+ const delta = (!isNaN(leftVal) && !isNaN(rightVal)) ? rightVal - leftVal : NaN;
1124
+ const bg = deltaColor(delta);
1125
+ const fg = deltaTextColor(delta);
1126
+ return (
1127
+ <td key={col} className="text-center py-1.5 px-2 font-mono border border-gray-800/50" style={{ backgroundColor: bg, color: fg }}>
1128
+ {formatDelta(delta)}
1129
+ </td>
1130
+ );
1131
+ })}
1132
+ </tr>
1133
+ );
1134
+ })}
1135
+ </tbody>
1136
+ </table>
1137
+ </div>
1138
+ </div>
1139
+ </div>
1140
+ );
1141
+ }
1142
+
1143
+ // ---------------------------------------------------------------------------
1144
+ // Per-task collapsible section
1145
+ // ---------------------------------------------------------------------------
1146
+
1147
+ function PerTaskCollapsible({
1148
+ sections,
1149
+ fullConfigs,
1150
+ visibleRows,
1151
+ }: {
1152
+ sections: { label: string; taskCount: number; metrics: { metric: Metric; data: Map<string, Map<EvalVariant, AggCell>> }[] }[];
1153
+ fullConfigs: Map<string, string>;
1154
+ visibleRows: Set<string>;
1155
+ }) {
1156
+ const [open, setOpen] = useState(false);
1157
+
1158
+ return (
1159
+ <div className="border-t border-gray-700 pt-4">
1160
+ <button
1161
+ onClick={() => setOpen((o) => !o)}
1162
+ className="flex items-center gap-2 text-sm font-semibold text-gray-300 hover:text-gray-100 transition-colors"
1163
+ >
1164
+ <span className={`text-xs transition-transform ${open ? "rotate-90" : ""}`}>▶</span>
1165
+ Per-task breakdown ({sections.length} tasks)
1166
+ </button>
1167
+ {open && (
1168
+ <div className="mt-4 space-y-10">
1169
+ {sections.map((section) => (
1170
+ <div key={section.label} className="space-y-4">
1171
+ <h3 className="text-sm font-medium text-cyan-300 border-b border-gray-800 pb-1">{section.label}</h3>
1172
+ <div className="flex flex-wrap gap-6">
1173
+ {section.metrics.map(({ metric: m, data }) => {
1174
+ const metricLabel = METRICS.find((x) => x.key === m)?.label ?? m;
1175
+ const metricRowLabels = sortRowLabels(Array.from(data.keys()).filter((l) => visibleRows.has(l)));
1176
+ const metricEvs = EVAL_VARIANTS.filter((ev) => {
1177
+ for (const evMap of data.values()) {
1178
+ if (evMap.has(ev)) return true;
1179
+ }
1180
+ return false;
1181
+ });
1182
+ if (metricRowLabels.length === 0) return null;
1183
+ return (
1184
+ <HeatmapGrid
1185
+ key={m}
1186
+ data={data}
1187
+ rowLabels={metricRowLabels}
1188
+ colLabels={metricEvs}
1189
+ metric={m}
1190
+ title={metricLabel}
1191
+ fullConfigs={fullConfigs}
1192
+ />
1193
+ );
1194
+ })}
1195
+ </div>
1196
+ </div>
1197
+ ))}
1198
+ </div>
1199
+ )}
1200
+ </div>
1201
+ );
1202
+ }
1203
+
1204
+ // ---------------------------------------------------------------------------
1205
+ // Per-task comparison collapsible (side-by-side + delta per task)
1206
+ // ---------------------------------------------------------------------------
1207
+
1208
+ function PerTaskComparisonCollapsible({
1209
+ filteredRows,
1210
+ pairs,
1211
+ leftLabel,
1212
+ rightLabel,
1213
+ leftFilter,
1214
+ rightFilter,
1215
+ fullConfigs,
1216
+ }: {
1217
+ filteredRows: SummaryRow[];
1218
+ pairs: [string, string][];
1219
+ leftLabel: string;
1220
+ rightLabel: string;
1221
+ leftFilter: (r: SummaryRow) => boolean;
1222
+ rightFilter: (r: SummaryRow) => boolean;
1223
+ fullConfigs: Map<string, string>;
1224
+ }) {
1225
+ const [open, setOpen] = useState(false);
1226
+
1227
+ const tasks = useMemo(() => {
1228
+ const t = new Set(filteredRows.map((r) => r.task));
1229
+ return Array.from(t).sort();
1230
+ }, [filteredRows]);
1231
+
1232
+ if (tasks.length <= 1) return null;
1233
+
1234
+ return (
1235
+ <div className="border-t border-gray-700 pt-4">
1236
+ <button
1237
+ onClick={() => setOpen((o) => !o)}
1238
+ className="flex items-center gap-2 text-sm font-semibold text-gray-300 hover:text-gray-100 transition-colors"
1239
+ >
1240
+ <span className={`text-xs transition-transform ${open ? "rotate-90" : ""}`}>▶</span>
1241
+ Per-task breakdown ({tasks.length} tasks)
1242
+ </button>
1243
+ {open && (
1244
+ <div className="mt-4 space-y-10">
1245
+ {tasks.map((task) => {
1246
+ const taskRows = filteredRows.filter((r) => r.task === task);
1247
+ const leftRows = taskRows.filter(leftFilter);
1248
+ const rightRows = taskRows.filter(rightFilter);
1249
+ return (
1250
+ <div key={task} className="space-y-4">
1251
+ <h3 className="text-sm font-medium text-cyan-300 border-b border-gray-800 pb-1">{task}</h3>
1252
+ <div className="space-y-6">
1253
+ {METRICS.map((m) => {
1254
+ const leftData = aggregateData(leftRows, m.key, getRowLabel).data;
1255
+ const rightData = aggregateData(rightRows, m.key, getRowLabel).data;
1256
+ const existingPairs = pairs.filter(([l, r]) => leftData.has(l) && rightData.has(r));
1257
+ if (existingPairs.length === 0) return null;
1258
+ const evs = EVAL_VARIANTS.filter((ev) => {
1259
+ for (const evMap of [...leftData.values(), ...rightData.values()]) {
1260
+ if (evMap.has(ev)) return true;
1261
+ }
1262
+ return false;
1263
+ });
1264
+ return (
1265
+ <SideBySideDelta
1266
+ key={m.key}
1267
+ pairs={existingPairs}
1268
+ leftData={leftData}
1269
+ rightData={rightData}
1270
+ colLabels={evs}
1271
+ metric={m.key}
1272
+ title={m.label}
1273
+ leftLabel={leftLabel}
1274
+ rightLabel={rightLabel}
1275
+ fullConfigs={fullConfigs}
1276
+ />
1277
+ );
1278
+ })}
1279
+ </div>
1280
+ </div>
1281
+ );
1282
+ })}
1283
+ </div>
1284
+ )}
1285
+ </div>
1286
+ );
1287
+ }
1288
+
1289
+ // ---------------------------------------------------------------------------
1290
+ // Main component
1291
+ // ---------------------------------------------------------------------------
1292
+
1293
+ export default function HeatmapViewer({
1294
+ datasetRepo,
1295
+ split: _split = "train",
1296
+ onClose,
1297
+ }: HeatmapViewerProps) {
1298
+ const fullRepo = datasetRepo.includes("/") ? datasetRepo : `${HF_ORG}/${datasetRepo}`;
1299
+ const shortName = datasetRepo.split("/").pop() ?? datasetRepo;
1300
+
1301
+ const { rows, loading, error, progress, refetch } = useDatasetRows(fullRepo, _split);
1302
+
1303
+ // --- Filter state ---
1304
+ const [selectedModel, setSelectedModel] = useState<string>("__first__");
1305
+ const [selectedFamily, setSelectedFamily] = useState<string>("hypernym");
1306
+ const [selectedTask, setSelectedTask] = useState<string>("__all__");
1307
+ const [selectedSplit, setSelectedSplit] = useState<string>("test");
1308
+ const [selectedTCType, setSelectedTCType] = useState<TCType>("self");
1309
+ const [selectedMetric, setSelectedMetric] = useState<Metric>("gen_roc");
1310
+ const [selectedDomain, setSelectedDomain] = useState<DomainFilter>("all");
1311
+ const [viewMode, setViewMode] = useState<ViewMode>("heatmap");
1312
+ const [barVariant, setBarVariant] = useState<EvalVariant>("raw");
1313
+ const [comparisonPreset, setComparisonPreset] = useState<ComparisonPreset>("all");
1314
+
1315
+ // Row visibility: null means "show all", otherwise explicit set from preset or manual toggle
1316
+ const [visibleRows, setVisibleRows] = useState<Set<string> | null>(null);
1317
+
1318
+ // --- Derived: available splits, tasks, families ---
1319
+ // --- Available base models (model column = base model identity, same for all finetuned variants) ---
1320
+ const availableModels = useMemo(() => {
1321
+ const s = new Set(rows.map((r) => r.model));
1322
+ return Array.from(s).sort();
1323
+ }, [rows]);
1324
+
1325
+ // Display-friendly name: "v6-google_gemma-2-2b" → "gemma-2-2b"
1326
+ const modelDisplayName = useCallback((m: string) => {
1327
+ return m.replace(/^v\d+-[^_]+_/, "");
1328
+ }, []);
1329
+
1330
+ // Resolve __first__ to actual first model once data loads
1331
+ const resolvedModel = useMemo(() => {
1332
+ if (selectedModel === "__first__" && availableModels.length > 0) return availableModels[0];
1333
+ if (availableModels.includes(selectedModel)) return selectedModel;
1334
+ return availableModels[0] ?? "";
1335
+ }, [selectedModel, availableModels]);
1336
+
1337
+ const availableSplits = useMemo(() => {
1338
+ const s = new Set(rows.map((r) => r.split));
1339
+ return Array.from(s).sort();
1340
+ }, [rows]);
1341
+
1342
+ const availableTasks = useMemo(() => {
1343
+ let filtered = rows.filter((r) => isValidEvalTask(r.task));
1344
+ const fam = TASK_FAMILIES.find((f) => f.key === selectedFamily);
1345
+ if (fam) filtered = filtered.filter((r) => r.task.startsWith(fam.prefix));
1346
+ const tasks = new Set(filtered.map((r) => r.task));
1347
+ return Array.from(tasks).sort();
1348
+ }, [rows, selectedFamily]);
1349
+
1350
+ // Reset task selection when family changes
1351
+ useEffect(() => {
1352
+ setSelectedTask("__all__");
1353
+ }, [selectedFamily]);
1354
+
1355
+ // --- Filtering pipeline ---
1356
+ const filteredRows = useMemo(() => {
1357
+ let result = rows;
1358
+
1359
+ // Model filter — critical: never mix different base models
1360
+ if (resolvedModel) {
1361
+ result = result.filter((r) => r.model === resolvedModel);
1362
+ }
1363
+
1364
+ // Exclude non-eval tasks (concat, bare family names, etc.)
1365
+ result = result.filter((r) => isValidEvalTask(r.task));
1366
+
1367
+ // Only show finetuned models trained on valid combined datasets (Setting U)
1368
+ result = result.filter((r) => isValidFinetunedModel(r));
1369
+
1370
+ // Split filter
1371
+ result = result.filter((r) => r.split === selectedSplit);
1372
+
1373
+ // TC type filter (single-select: each TC type is a different eval condition)
1374
+ result = result.filter((r) => getEvalTCType(r) === selectedTCType);
1375
+
1376
+ // For finetuned models trained with a specific TC, only show rows where eval TC matches training TC
1377
+ result = result.filter((r) => isMatchedTC(r));
1378
+
1379
+ // Family filter — also exclude finetuned models trained on a different family
1380
+ const fam = TASK_FAMILIES.find((f) => f.key === selectedFamily);
1381
+ if (fam) {
1382
+ result = result.filter((r) => r.task.startsWith(fam.prefix));
1383
+ result = result.filter((r) => {
1384
+ if (!r.finetuned) return true; // base models always shown
1385
+ const trainFam = getTrainingFamily(r.training_config);
1386
+ return trainFam === null || trainFam === selectedFamily;
1387
+ });
1388
+ }
1389
+
1390
+ // Specific task filter
1391
+ if (selectedTask !== "__all__") {
1392
+ result = result.filter((r) => r.task === selectedTask);
1393
+ }
1394
+
1395
+ // Domain filter (OOD vs in-domain)
1396
+ if (selectedDomain !== "all") {
1397
+ result = result.filter((r) => {
1398
+ const family = getTaskFamily(r.task);
1399
+ if (!family) return true;
1400
+ const ood = isOodTask(r.task, r.training_config, family);
1401
+ if (ood === null) return true;
1402
+ return selectedDomain === "ood" ? ood : !ood;
1403
+ });
1404
+ }
1405
+
1406
+ return result;
1407
+ }, [rows, resolvedModel, selectedSplit, selectedTCType, selectedFamily, selectedTask, selectedDomain]);
1408
+
1409
+ // --- Build row label → full config mapping ---
1410
+ const fullConfigs = useMemo(() => {
1411
+ const map = new Map<string, string>();
1412
+ for (const r of filteredRows) {
1413
+ const label = getRowLabel(r);
1414
+ if (!map.has(label)) {
1415
+ map.set(label, r.finetuned ? r.training_config : "Base (not finetuned)");
1416
+ }
1417
+ }
1418
+ return map;
1419
+ }, [filteredRows]);
1420
+
1421
+ // --- All available row labels (before visibility filter) ---
1422
+ const allRowLabels = useMemo(() => {
1423
+ const labels = new Set<string>();
1424
+ for (const r of filteredRows) labels.add(getRowLabel(r));
1425
+ return sortRowLabels(Array.from(labels));
1426
+ }, [filteredRows]);
1427
+
1428
+ // Effective visible rows: use explicit selection, preset, or show all
1429
+ const effectiveVisibleRows = useMemo(() => {
1430
+ const available = new Set(allRowLabels);
1431
+
1432
+ // If a comparison preset restricts rows, apply that
1433
+ if (comparisonPreset === "training-effect" && visibleRows === null) {
1434
+ const effective = new Set<string>();
1435
+ for (const label of available) {
1436
+ if (TRAINING_EFFECT_ROWS.has(label)) effective.add(label);
1437
+ }
1438
+ return effective.size > 0 ? effective : available;
1439
+ }
1440
+
1441
+ // Manual selection
1442
+ if (visibleRows !== null) {
1443
+ const effective = new Set<string>();
1444
+ for (const label of visibleRows) {
1445
+ if (available.has(label)) effective.add(label);
1446
+ }
1447
+ return effective.size > 0 ? effective : available;
1448
+ }
1449
+
1450
+ // Default: show all except hidden-by-default rows
1451
+ const effective = new Set<string>();
1452
+ for (const label of available) {
1453
+ if (!DEFAULT_HIDDEN_ROWS.has(label)) effective.add(label);
1454
+ }
1455
+ return effective.size > 0 ? effective : available;
1456
+ }, [visibleRows, allRowLabels, comparisonPreset]);
1457
+
1458
+ // --- Aggregation ---
1459
+ const aggResult = useMemo(() => {
1460
+ return aggregateData(filteredRows, selectedMetric, getRowLabel);
1461
+ }, [filteredRows, selectedMetric]);
1462
+
1463
+ const aggData = aggResult.data;
1464
+ const validationError = aggResult.validationError;
1465
+
1466
+ const rowLabels = useMemo(() => {
1467
+ return sortRowLabels(Array.from(aggData.keys()).filter((l) => effectiveVisibleRows.has(l)));
1468
+ }, [aggData, effectiveVisibleRows]);
1469
+
1470
+ // Available eval variants (only those with data)
1471
+ const availableEvalVariants = useMemo(() => {
1472
+ const evs = new Set<EvalVariant>();
1473
+ for (const evMap of aggData.values()) {
1474
+ for (const ev of evMap.keys()) evs.add(ev);
1475
+ }
1476
+ return EVAL_VARIANTS.filter((ev) => evs.has(ev));
1477
+ }, [aggData]);
1478
+
1479
+ // --- Stats ---
1480
+ const stats = useMemo(() => {
1481
+ const taskCount = new Set(filteredRows.map((r) => r.task)).size;
1482
+ const modelCount = new Set(filteredRows.map((r) => r.finetuned ? r.training_config : "base")).size;
1483
+ return { tasks: taskCount, models: modelCount, rows: filteredRows.length };
1484
+ }, [filteredRows]);
1485
+
1486
+ // --- Domain-split rows for aggregated heatmaps ---
1487
+ const domainSplitRows = useMemo(() => {
1488
+ const hasDomainSplit = (selectedFamily === "ifeval" || selectedFamily === "hypernym") && selectedTask === "__all__";
1489
+ if (!hasDomainSplit || selectedDomain !== "all") return null;
1490
+
1491
+ const oodRows = filteredRows.filter((r) => {
1492
+ const family = getTaskFamily(r.task);
1493
+ if (!family) return false;
1494
+ return isOodTask(r.task, r.training_config, family) === true;
1495
+ });
1496
+ const idRows = filteredRows.filter((r) => {
1497
+ const family = getTaskFamily(r.task);
1498
+ if (!family) return false;
1499
+ return isOodTask(r.task, r.training_config, family) === false;
1500
+ });
1501
+ const oodTaskCount = new Set(oodRows.map((r) => r.task)).size;
1502
+ const idTaskCount = new Set(idRows.map((r) => r.task)).size;
1503
+ return { oodRows, idRows, oodTaskCount, idTaskCount };
1504
+ }, [filteredRows, selectedFamily, selectedTask, selectedDomain]);
1505
+
1506
+ // --- Multi-metric heatmaps (one set per domain group when applicable) ---
1507
+ type HeatmapSection = {
1508
+ label: string;
1509
+ taskCount: number;
1510
+ metrics: { metric: Metric; data: Map<string, Map<EvalVariant, AggCell>> }[];
1511
+ };
1512
+
1513
+ const heatmapSections = useMemo((): HeatmapSection[] => {
1514
+ if (viewMode !== "heatmap") return [];
1515
+
1516
+ const buildSection = (sectionRows: SummaryRow[], label: string, taskCount: number): HeatmapSection => {
1517
+ const metrics: { metric: Metric; data: Map<string, Map<EvalVariant, AggCell>> }[] = [];
1518
+ for (const m of METRICS) {
1519
+ metrics.push({ metric: m.key, data: aggregateData(sectionRows, m.key, getRowLabel).data });
1520
+ }
1521
+ return { label, taskCount, metrics };
1522
+ };
1523
+
1524
+ if (domainSplitRows) {
1525
+ const sections: HeatmapSection[] = [];
1526
+ if (domainSplitRows.oodRows.length > 0) {
1527
+ sections.push(buildSection(domainSplitRows.oodRows, "Out-of-domain", domainSplitRows.oodTaskCount));
1528
+ }
1529
+ if (domainSplitRows.idRows.length > 0) {
1530
+ sections.push(buildSection(domainSplitRows.idRows, "In-domain", domainSplitRows.idTaskCount));
1531
+ }
1532
+ return sections;
1533
+ }
1534
+
1535
+ // For families without domain splits but with multiple tasks, still show aggregate
1536
+ return [buildSection(filteredRows, "All tasks", stats.tasks)];
1537
+ }, [filteredRows, viewMode, domainSplitRows, stats.tasks]);
1538
+
1539
+ // --- Per-task heatmaps (when viewing "All tasks" in a family) ---
1540
+ const perTaskSections = useMemo((): HeatmapSection[] => {
1541
+ if (viewMode !== "heatmap") return [];
1542
+ if (selectedTask !== "__all__") return []; // single task already shown in main sections
1543
+ const tasks = new Set(filteredRows.map((r) => r.task));
1544
+ if (tasks.size <= 1) return []; // no point showing per-task if only one
1545
+
1546
+ const sorted = Array.from(tasks).sort();
1547
+ return sorted.map((task) => {
1548
+ const taskRows = filteredRows.filter((r) => r.task === task);
1549
+ const metrics: { metric: Metric; data: Map<string, Map<EvalVariant, AggCell>> }[] = [];
1550
+ for (const m of METRICS) {
1551
+ metrics.push({ metric: m.key, data: aggregateData(taskRows, m.key, getRowLabel).data });
1552
+ }
1553
+ return { label: task, taskCount: 1, metrics };
1554
+ });
1555
+ }, [filteredRows, viewMode, selectedTask]);
1556
+
1557
+ // --- Comparison preset data ---
1558
+ // For delta comparisons, we need rows from multiple TC types simultaneously
1559
+ const comparisonBaseRows = useMemo(() => {
1560
+ // Same as filteredRows but without TC type filter
1561
+ let result = rows;
1562
+ // Model filter — must match filteredRows
1563
+ if (resolvedModel) {
1564
+ result = result.filter((r) => r.model === resolvedModel);
1565
+ }
1566
+ result = result.filter((r) => isValidEvalTask(r.task));
1567
+ result = result.filter((r) => isValidFinetunedModel(r));
1568
+ result = result.filter((r) => r.split === selectedSplit);
1569
+ // For finetuned models trained with a specific TC, only show rows where eval TC matches training TC
1570
+ result = result.filter((r) => isMatchedTC(r));
1571
+ const fam = TASK_FAMILIES.find((f) => f.key === selectedFamily);
1572
+ if (fam) {
1573
+ result = result.filter((r) => r.task.startsWith(fam.prefix));
1574
+ result = result.filter((r) => {
1575
+ if (!r.finetuned) return true;
1576
+ const trainFam = getTrainingFamily(r.training_config);
1577
+ return trainFam === null || trainFam === selectedFamily;
1578
+ });
1579
+ }
1580
+ if (selectedTask !== "__all__") {
1581
+ result = result.filter((r) => r.task === selectedTask);
1582
+ }
1583
+ if (selectedDomain !== "all") {
1584
+ result = result.filter((r) => {
1585
+ const family = getTaskFamily(r.task);
1586
+ if (!family) return true;
1587
+ const ood = isOodTask(r.task, r.training_config, family);
1588
+ if (ood === null) return true;
1589
+ return selectedDomain === "ood" ? ood : !ood;
1590
+ });
1591
+ }
1592
+ return result;
1593
+ }, [rows, resolvedModel, selectedSplit, selectedFamily, selectedTask, selectedDomain]);
1594
+
1595
+ // Build aggregated data for rows with a specific training TC flag (in the row label)
1596
+ // Note: this is the TRAINING tc flag (in the model name), NOT the eval TC type
1597
+ const aggByTrainingTC = useCallback((trainingTCFlag: string | null, metric: Metric) => {
1598
+ const subset = filteredRows.filter((r) => {
1599
+ const label = getRowLabel(r);
1600
+ if (trainingTCFlag === null) {
1601
+ // No training TC flag — exclude rows that have any TC flag
1602
+ return !TC_FLAGS.some((f) => hasTCFlag(label, f));
1603
+ }
1604
+ return hasTCFlag(label, trainingTCFlag);
1605
+ });
1606
+ return aggregateData(subset, metric, getRowLabel).data;
1607
+ }, [filteredRows]);
1608
+
1609
+ // Dynamic pairing: find rows that differ only by a TC flag
1610
+ const buildPairs = useCallback((leftFlag: string | null, rightFlag: string): [string, string][] => {
1611
+ const leftLabels = new Set<string>();
1612
+ const rightLabels = new Set<string>();
1613
+ for (const r of filteredRows) {
1614
+ const label = getRowLabel(r);
1615
+ if (label === "Base") continue;
1616
+ if (rightFlag && hasTCFlag(label, rightFlag)) {
1617
+ rightLabels.add(label);
1618
+ } else if (leftFlag === null && !TC_FLAGS.some((f) => hasTCFlag(label, f))) {
1619
+ leftLabels.add(label);
1620
+ } else if (leftFlag && hasTCFlag(label, leftFlag)) {
1621
+ leftLabels.add(label);
1622
+ }
1623
+ }
1624
+ // Match: strip TC flag from right label, see if it matches a left label
1625
+ const pairs: [string, string][] = [];
1626
+ for (const rl of rightLabels) {
1627
+ const stripped = stripTCFlag(rl);
1628
+ if (leftLabels.has(stripped)) {
1629
+ pairs.push([stripped, rl]);
1630
+ }
1631
+ }
1632
+ return sortRowLabels(pairs.map(([l]) => l)).map((l) => {
1633
+ const r = pairs.find(([left]) => left === l)![1];
1634
+ return [l, r] as [string, string];
1635
+ });
1636
+ }, [filteredRows]);
1637
+
1638
+ // + TC-Self pairs: without-tc ↔ tcself
1639
+ const tcSelfPairs = useMemo((): [string, string][] => {
1640
+ if (comparisonPreset !== "plus-tcself") return [];
1641
+ return buildPairs(null, "tcself");
1642
+ }, [comparisonPreset, buildPairs]);
1643
+
1644
+ // TC-Self vs TC-Neg: for each model, compare train-tcself+eval-self vs train-tcneg+eval-neg
1645
+ // Base model: same "Base" label, just different eval TC type
1646
+ // Finetuned: pair by stripping tcself/tcneg from training label
1647
+ const aggByMatchedTC = useCallback((evalTC: TCType, metric: Metric) => {
1648
+ // From comparisonBaseRows (no eval TC filter), get rows matching this eval TC type
1649
+ const subset = comparisonBaseRows.filter((r) => getEvalTCType(r) === evalTC);
1650
+ return aggregateData(subset, metric, getRowLabel).data;
1651
+ }, [comparisonBaseRows]);
1652
+
1653
+ const tcSelfVsNegPairs = useMemo((): [string, string][] => {
1654
+ if (comparisonPreset !== "tcself-vs-tcneg") return [];
1655
+ // Left: models evaluated with self-TC (base + models trained with tcself)
1656
+ const selfRows = comparisonBaseRows.filter((r) => getEvalTCType(r) === "self");
1657
+ const selfLabels = new Set(selfRows.map(getRowLabel));
1658
+ // Right: models evaluated with neg-TC (base + models trained with tcneg)
1659
+ const negRows = comparisonBaseRows.filter((r) => getEvalTCType(r) === "neg");
1660
+ const negLabels = new Set(negRows.map(getRowLabel));
1661
+
1662
+ const pairs: [string, string][] = [];
1663
+ // Base: appears in both with same label
1664
+ if (selfLabels.has("Base") && negLabels.has("Base")) {
1665
+ pairs.push(["Base", "Base"]);
1666
+ }
1667
+ // Finetuned: match tcself label ↔ tcneg label (strip tc flag to find pair)
1668
+ for (const sl of selfLabels) {
1669
+ if (sl === "Base") continue;
1670
+ if (!hasTCFlag(sl, "tcself")) continue;
1671
+ const negVersion = sl.replace("tcself", "tcneg");
1672
+ if (negLabels.has(negVersion)) {
1673
+ pairs.push([sl, negVersion]);
1674
+ }
1675
+ }
1676
+ return sortRowLabels(pairs.map(([l]) => l)).map((l) => {
1677
+ const p = pairs.find(([left]) => left === l)!;
1678
+ return p;
1679
+ });
1680
+ }, [comparisonPreset, comparisonBaseRows]);
1681
+
1682
+ // TC-Self vs TC-GPT2: pair tcself-trained (eval=self) with tco-trained (eval=gpt2)
1683
+ // e.g. "Comb-tcself-lo" ↔ "Comb-tco-lo"
1684
+ const tcSelfVsGpt2Pairs = useMemo((): [string, string][] => {
1685
+ if (comparisonPreset !== "tcself-vs-tcgpt2") return [];
1686
+ const selfRows = comparisonBaseRows.filter((r) => getEvalTCType(r) === "self");
1687
+ const selfLabels = new Set(selfRows.map(getRowLabel));
1688
+ const gpt2Rows = comparisonBaseRows.filter((r) => getEvalTCType(r) === "gpt2");
1689
+ const gpt2Labels = new Set(gpt2Rows.map(getRowLabel));
1690
+
1691
+ const pairs: [string, string][] = [];
1692
+ // Base: appears in both with same label
1693
+ if (selfLabels.has("Base") && gpt2Labels.has("Base")) {
1694
+ pairs.push(["Base", "Base"]);
1695
+ }
1696
+ // Finetuned: match tcself label ↔ tco label (swap tc flag to find pair)
1697
+ for (const sl of selfLabels) {
1698
+ if (sl === "Base") continue;
1699
+ if (!hasTCFlag(sl, "tcself")) continue;
1700
+ const tcoVersion = sl.replace("tcself", "tco");
1701
+ if (gpt2Labels.has(tcoVersion)) {
1702
+ pairs.push([sl, tcoVersion]);
1703
+ }
1704
+ }
1705
+ return sortRowLabels(pairs.map(([l]) => l)).map((l) => {
1706
+ const p = pairs.find(([left]) => left === l)!;
1707
+ return p;
1708
+ });
1709
+ }, [comparisonPreset, comparisonBaseRows]);
1710
+
1711
+ // Family options for dropdown
1712
+ const familyOptions = useMemo((): { key: string; label: string }[] => {
1713
+ const options: { key: string; label: string }[] = [];
1714
+ for (const fam of TASK_FAMILIES) {
1715
+ const count = new Set(rows.filter((r) => r.task.startsWith(fam.prefix)).map((r) => r.task)).size;
1716
+ if (count > 0) options.push({ key: fam.key, label: `${fam.label} (${count})` });
1717
+ }
1718
+ return options;
1719
+ }, [rows]);
1720
+
1721
+ // Task options for dropdown
1722
+ const taskOptions = useMemo(() => {
1723
+ const options = [{ key: "__all__", label: `All tasks (${availableTasks.length})` }];
1724
+ for (const t of availableTasks) {
1725
+ options.push({ key: t, label: t });
1726
+ }
1727
+ return options;
1728
+ }, [availableTasks]);
1729
+
1730
+ // Domain filter visibility
1731
+ const showDomainFilter = selectedFamily === "ifeval" || selectedFamily === "hypernym";
1732
+
1733
+ return (
1734
+ <div className="fixed inset-0 z-50 flex flex-col bg-gray-950">
1735
+ {/* Header */}
1736
+ <div className="flex items-center justify-between px-5 py-3 border-b border-gray-800 flex-shrink-0 bg-gray-900">
1737
+ <div className="flex items-center gap-3 min-w-0">
1738
+ <span className="text-sm font-semibold text-gray-200 truncate">{shortName}</span>
1739
+ <span className="text-xs text-gray-600 border border-gray-700 px-1.5 py-0.5 rounded">heatmap</span>
1740
+ {!loading && (
1741
+ <span className="text-xs text-gray-500">
1742
+ {rows.length.toLocaleString()} rows loaded
1743
+ </span>
1744
+ )}
1745
+ </div>
1746
+ <div className="flex items-center gap-2 flex-shrink-0">
1747
+ <a
1748
+ href={`https://huggingface.co/datasets/${fullRepo}`}
1749
+ target="_blank"
1750
+ rel="noopener noreferrer"
1751
+ className="text-xs text-gray-500 hover:text-cyan-400 transition-colors px-2 py-1 border border-gray-700 rounded"
1752
+ >
1753
+ HF
1754
+ </a>
1755
+ <button
1756
+ onClick={onClose}
1757
+ className="text-gray-400 hover:text-gray-200 transition-colors p-1 rounded hover:bg-gray-700"
1758
+ aria-label="Close viewer"
1759
+ >
1760
+ <svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={2}>
1761
+ <path strokeLinecap="round" strokeLinejoin="round" d="M6 18L18 6M6 6l12 12" />
1762
+ </svg>
1763
+ </button>
1764
+ </div>
1765
+ </div>
1766
+
1767
+ {/* Loading */}
1768
+ {loading && (
1769
+ <div className="flex-1 flex items-center justify-center">
1770
+ <div className="flex flex-col items-center gap-3">
1771
+ <div className="w-6 h-6 border-2 border-cyan-500 border-t-transparent rounded-full animate-spin" />
1772
+ <p className="text-sm text-gray-400">
1773
+ Loading... {progress.loaded.toLocaleString()} / {progress.total.toLocaleString()} rows
1774
+ </p>
1775
+ </div>
1776
+ </div>
1777
+ )}
1778
+
1779
+ {/* Error */}
1780
+ {!loading && error && (
1781
+ <div className="flex-1 flex items-center justify-center">
1782
+ <div className="max-w-lg text-center space-y-3">
1783
+ <p className="text-sm font-medium text-red-400">Failed to load dataset</p>
1784
+ <p className="text-xs text-gray-500 font-mono break-words bg-gray-800 rounded p-3">{error}</p>
1785
+ <button
1786
+ onClick={refetch}
1787
+ className="text-xs text-cyan-400 hover:text-cyan-300 border border-cyan-700/50 px-3 py-1 rounded transition-colors"
1788
+ >
1789
+ Retry
1790
+ </button>
1791
+ </div>
1792
+ </div>
1793
+ )}
1794
+
1795
+ {/* Main content */}
1796
+ {!loading && !error && rows.length > 0 && (
1797
+ <div className="flex-1 flex overflow-hidden">
1798
+ {/* Controls sidebar */}
1799
+ <div className="w-64 flex-shrink-0 border-r border-gray-800 bg-gray-900/50 overflow-y-auto p-4 space-y-4">
1800
+ {/* View mode */}
1801
+ <div className="flex gap-1">
1802
+ <button
1803
+ onClick={() => setViewMode("heatmap")}
1804
+ className={`flex-1 text-xs py-1.5 rounded border transition-colors ${
1805
+ viewMode === "heatmap"
1806
+ ? "bg-cyan-800/60 text-cyan-200 border-cyan-600/60"
1807
+ : "bg-gray-800 text-gray-500 border-gray-700"
1808
+ }`}
1809
+ >
1810
+ Heatmap
1811
+ </button>
1812
+ <button
1813
+ onClick={() => setViewMode("bar")}
1814
+ className={`flex-1 text-xs py-1.5 rounded border transition-colors ${
1815
+ viewMode === "bar"
1816
+ ? "bg-cyan-800/60 text-cyan-200 border-cyan-600/60"
1817
+ : "bg-gray-800 text-gray-500 border-gray-700"
1818
+ }`}
1819
+ >
1820
+ Bar Plot
1821
+ </button>
1822
+ </div>
1823
+
1824
+ {availableModels.length > 1 && (
1825
+ <Dropdown
1826
+ label="Base Model"
1827
+ value={resolvedModel}
1828
+ options={availableModels.map((m) => ({ key: m, label: modelDisplayName(m) }))}
1829
+ onChange={setSelectedModel}
1830
+ />
1831
+ )}
1832
+
1833
+ <Dropdown
1834
+ label="Task Family"
1835
+ value={selectedFamily}
1836
+ options={familyOptions}
1837
+ onChange={setSelectedFamily}
1838
+ />
1839
+
1840
+ <Dropdown
1841
+ label="Task"
1842
+ value={selectedTask}
1843
+ options={taskOptions}
1844
+ onChange={setSelectedTask}
1845
+ />
1846
+
1847
+ <Dropdown
1848
+ label="Split"
1849
+ value={selectedSplit}
1850
+ options={availableSplits.map((s) => ({ key: s, label: s }))}
1851
+ onChange={setSelectedSplit}
1852
+ />
1853
+
1854
+ <Dropdown
1855
+ label="Eval TC Type"
1856
+ value={selectedTCType}
1857
+ options={TC_TYPES}
1858
+ onChange={setSelectedTCType}
1859
+ />
1860
+
1861
+ {showDomainFilter && (
1862
+ <Dropdown
1863
+ label="Domain"
1864
+ value={selectedDomain}
1865
+ options={[
1866
+ { key: "all" as DomainFilter, label: "All" },
1867
+ { key: "ood" as DomainFilter, label: "Out-of-domain" },
1868
+ { key: "in-domain" as DomainFilter, label: "In-domain" },
1869
+ ]}
1870
+ onChange={setSelectedDomain}
1871
+ />
1872
+ )}
1873
+
1874
+ <Dropdown
1875
+ label="Comparison"
1876
+ value={comparisonPreset}
1877
+ options={COMPARISON_PRESETS}
1878
+ onChange={setComparisonPreset}
1879
+ />
1880
+
1881
+ {/* Row visibility */}
1882
+ {allRowLabels.length > 0 && (
1883
+ <div className="flex flex-col gap-1">
1884
+ <div className="flex items-center justify-between">
1885
+ <label className="text-[10px] uppercase tracking-wider text-gray-500 font-medium">Visible Models</label>
1886
+ <div className="flex gap-1">
1887
+ <button
1888
+ onClick={() => setVisibleRows(new Set(allRowLabels))}
1889
+ className="text-[9px] text-gray-500 hover:text-gray-300 px-1"
1890
+ >
1891
+ all
1892
+ </button>
1893
+ <button
1894
+ onClick={() => setVisibleRows(null)}
1895
+ className="text-[9px] text-gray-500 hover:text-gray-300 px-1"
1896
+ >
1897
+ default
1898
+ </button>
1899
+ </div>
1900
+ </div>
1901
+ <div className="flex flex-col gap-0.5 max-h-48 overflow-y-auto">
1902
+ {allRowLabels.map((label) => (
1903
+ <label key={label} className="flex items-center gap-1.5 text-xs cursor-pointer hover:bg-gray-800/50 px-1 py-0.5 rounded">
1904
+ <input
1905
+ type="checkbox"
1906
+ checked={effectiveVisibleRows.has(label)}
1907
+ onChange={() => {
1908
+ const next = new Set(effectiveVisibleRows);
1909
+ if (next.has(label)) next.delete(label);
1910
+ else next.add(label);
1911
+ setVisibleRows(next);
1912
+ }}
1913
+ className="rounded border-gray-600 bg-gray-800 text-cyan-500 focus:ring-0 focus:ring-offset-0 h-3 w-3"
1914
+ />
1915
+ <span className={effectiveVisibleRows.has(label) ? "text-gray-200" : "text-gray-500"}><DisplayRowLabel label={label} /></span>
1916
+ </label>
1917
+ ))}
1918
+ </div>
1919
+ </div>
1920
+ )}
1921
+
1922
+
1923
+ {/* Stats */}
1924
+ <div className="pt-2 border-t border-gray-800 space-y-1">
1925
+ <p className="text-[10px] uppercase tracking-wider text-gray-500 font-medium">Filtered data</p>
1926
+ <p className="text-xs text-gray-400">{stats.tasks} tasks, {stats.models} configs</p>
1927
+ <p className="text-xs text-gray-400">{stats.rows.toLocaleString()} rows</p>
1928
+ {selectedTask === "__all__" && stats.tasks > 1 && (
1929
+ <p className="text-[10px] text-gray-600 italic">Averaging across {stats.tasks} tasks</p>
1930
+ )}
1931
+ </div>
1932
+ </div>
1933
+
1934
+ {/* Visualization area */}
1935
+ <div className="flex-1 overflow-auto p-6 space-y-8">
1936
+ {validationError ? (
1937
+ <div className="flex items-center justify-center h-full">
1938
+ <div className="max-w-xl bg-red-950 border-2 border-red-500 rounded-lg p-6 space-y-3">
1939
+ <div className="flex items-center gap-2">
1940
+ <span className="text-2xl">!!!</span>
1941
+ <h2 className="text-lg font-bold text-red-300">Data Integrity Violation</h2>
1942
+ </div>
1943
+ <pre className="text-sm text-red-200 whitespace-pre-wrap font-mono leading-relaxed">{validationError}</pre>
1944
+ <p className="text-xs text-red-400 mt-4">
1945
+ All visualizations are blocked until this is resolved. If you see this, the data pipeline has a bug
1946
+ — different models or training configs are collapsing into the same row label.
1947
+ </p>
1948
+ </div>
1949
+ </div>
1950
+ ) : filteredRows.length === 0 ? (
1951
+ <div className="flex items-center justify-center h-full">
1952
+ <p className="text-sm text-gray-500 italic">No data matches the current filters.</p>
1953
+ </div>
1954
+ ) : viewMode === "heatmap" ? (
1955
+ <>
1956
+ {/* Aggregated sections (domain-split or single aggregate) — hidden when comparison preset is active */}
1957
+ {(comparisonPreset === "all" || comparisonPreset === "training-effect") && heatmapSections.map((section, sIdx) => (
1958
+ <div key={`agg-${sIdx}`} className="space-y-6">
1959
+ {section.label && (
1960
+ <h2 className="text-sm font-semibold text-gray-200 border-b border-gray-700 pb-2">
1961
+ {section.label}
1962
+ {section.taskCount > 1 && (
1963
+ <span className="text-gray-500 font-normal ml-2">(mean over {section.taskCount} tasks)</span>
1964
+ )}
1965
+ </h2>
1966
+ )}
1967
+ <div className="flex flex-wrap gap-6">
1968
+ {section.metrics.map(({ metric: m, data }) => {
1969
+ const metricLabel = METRICS.find((x) => x.key === m)?.label ?? m;
1970
+ const metricRowLabels = sortRowLabels(Array.from(data.keys()).filter((l) => effectiveVisibleRows.has(l)));
1971
+ const metricEvs = EVAL_VARIANTS.filter((ev) => {
1972
+ for (const evMap of data.values()) {
1973
+ if (evMap.has(ev)) return true;
1974
+ }
1975
+ return false;
1976
+ });
1977
+ if (metricRowLabels.length === 0) return null;
1978
+ return (
1979
+ <HeatmapGrid
1980
+ key={m}
1981
+ data={data}
1982
+ rowLabels={metricRowLabels}
1983
+ colLabels={metricEvs}
1984
+ metric={m}
1985
+ title={metricLabel}
1986
+ fullConfigs={fullConfigs}
1987
+ />
1988
+ );
1989
+ })}
1990
+ </div>
1991
+ </div>
1992
+ ))}
1993
+
1994
+ {/* Comparison: + TC-Self (side-by-side + delta) */}
1995
+ {comparisonPreset === "plus-tcself" && tcSelfPairs.length === 0 && (
1996
+ <p className="text-sm text-gray-500 italic">No matched pairs found for + TC-Self comparison. Check that both non-TC and tcself models exist for this family.</p>
1997
+ )}
1998
+ {comparisonPreset === "plus-tcself" && tcSelfPairs.length > 0 && METRICS.map((m) => {
1999
+ const leftData = aggByTrainingTC(null, m.key);
2000
+ const rightData = aggByTrainingTC("tcself", m.key);
2001
+ const existingPairs = tcSelfPairs.filter(([l, r]) => leftData.has(l) && rightData.has(r));
2002
+ if (existingPairs.length === 0) return null;
2003
+ const evs = EVAL_VARIANTS.filter((ev) => {
2004
+ for (const evMap of [...leftData.values(), ...rightData.values()]) {
2005
+ if (evMap.has(ev)) return true;
2006
+ }
2007
+ return false;
2008
+ });
2009
+ return (
2010
+ <SideBySideDelta
2011
+ key={`tcself-${m.key}`}
2012
+ pairs={existingPairs}
2013
+ leftData={leftData}
2014
+ rightData={rightData}
2015
+ colLabels={evs}
2016
+ metric={m.key}
2017
+ title={m.label}
2018
+ leftLabel="Without TC"
2019
+ rightLabel="+ TC-Self"
2020
+ fullConfigs={fullConfigs}
2021
+ />
2022
+ );
2023
+ })}
2024
+
2025
+ {/* Comparison: TC-Self vs TC-Neg (side-by-side + delta) */}
2026
+ {comparisonPreset === "tcself-vs-tcneg" && tcSelfVsNegPairs.length === 0 && (
2027
+ <p className="text-sm text-gray-500 italic">No matched pairs found for TC-Self vs TC-Neg comparison. Check that both tcself and tcneg models exist for this family.</p>
2028
+ )}
2029
+ {comparisonPreset === "tcself-vs-tcneg" && tcSelfVsNegPairs.length > 0 && METRICS.map((m) => {
2030
+ const leftData = aggByMatchedTC("self", m.key);
2031
+ const rightData = aggByMatchedTC("neg", m.key);
2032
+ const existingPairs = tcSelfVsNegPairs.filter(([l, r]) => leftData.has(l) && rightData.has(r));
2033
+ if (existingPairs.length === 0) return null;
2034
+ const evs = EVAL_VARIANTS.filter((ev) => {
2035
+ for (const evMap of [...leftData.values(), ...rightData.values()]) {
2036
+ if (evMap.has(ev)) return true;
2037
+ }
2038
+ return false;
2039
+ });
2040
+ return (
2041
+ <SideBySideDelta
2042
+ key={`tcneg-${m.key}`}
2043
+ pairs={existingPairs}
2044
+ leftData={leftData}
2045
+ rightData={rightData}
2046
+ colLabels={evs}
2047
+ metric={m.key}
2048
+ title={m.label}
2049
+ leftLabel="TC-Self"
2050
+ rightLabel="TC-Neg"
2051
+ fullConfigs={fullConfigs}
2052
+ />
2053
+ );
2054
+ })}
2055
+
2056
+ {/* Comparison: TC-Self vs TC-GPT2 (side-by-side + delta) */}
2057
+ {comparisonPreset === "tcself-vs-tcgpt2" && tcSelfVsGpt2Pairs.length === 0 && (
2058
+ <p className="text-sm text-gray-500 italic">No matched pairs found for TC-Self vs TC-GPT2 comparison.</p>
2059
+ )}
2060
+ {comparisonPreset === "tcself-vs-tcgpt2" && tcSelfVsGpt2Pairs.length > 0 && METRICS.map((m) => {
2061
+ const leftData = aggByMatchedTC("self", m.key);
2062
+ const rightData = aggByMatchedTC("gpt2", m.key);
2063
+ const existingPairs = tcSelfVsGpt2Pairs.filter(([l, r]) => leftData.has(l) && rightData.has(r));
2064
+ if (existingPairs.length === 0) return null;
2065
+ const evs = EVAL_VARIANTS.filter((ev) => {
2066
+ for (const evMap of [...leftData.values(), ...rightData.values()]) {
2067
+ if (evMap.has(ev)) return true;
2068
+ }
2069
+ return false;
2070
+ });
2071
+ return (
2072
+ <SideBySideDelta
2073
+ key={`tcgpt2-${m.key}`}
2074
+ pairs={existingPairs}
2075
+ leftData={leftData}
2076
+ rightData={rightData}
2077
+ colLabels={evs}
2078
+ metric={m.key}
2079
+ title={m.label}
2080
+ leftLabel="TC-Self"
2081
+ rightLabel="TC-GPT2"
2082
+ fullConfigs={fullConfigs}
2083
+ />
2084
+ );
2085
+ })}
2086
+
2087
+ {/* Per-task breakdown — regular heatmap for all/training-effect */}
2088
+ {(comparisonPreset === "all" || comparisonPreset === "training-effect") && perTaskSections.length > 0 && (
2089
+ <PerTaskCollapsible sections={perTaskSections} fullConfigs={fullConfigs} visibleRows={effectiveVisibleRows} />
2090
+ )}
2091
+
2092
+ {/* Per-task breakdown — side-by-side + delta for + TC-Self */}
2093
+ {comparisonPreset === "plus-tcself" && selectedTask === "__all__" && (
2094
+ <PerTaskComparisonCollapsible
2095
+ filteredRows={filteredRows}
2096
+ pairs={tcSelfPairs}
2097
+ leftLabel="Without TC"
2098
+ rightLabel="+ TC-Self"
2099
+ leftFilter={(r) => {
2100
+ const label = getRowLabel(r);
2101
+ return !TC_FLAGS.some((f) => hasTCFlag(label, f));
2102
+ }}
2103
+ rightFilter={(r) => hasTCFlag(getRowLabel(r), "tcself")}
2104
+ fullConfigs={fullConfigs}
2105
+ />
2106
+ )}
2107
+
2108
+ {/* Per-task breakdown — side-by-side + delta for TC-Self vs TC-Neg */}
2109
+ {comparisonPreset === "tcself-vs-tcneg" && selectedTask === "__all__" && (
2110
+ <PerTaskComparisonCollapsible
2111
+ filteredRows={comparisonBaseRows}
2112
+ pairs={tcSelfVsNegPairs}
2113
+ leftLabel="TC-Self (eval)"
2114
+ rightLabel="TC-Neg (eval)"
2115
+ leftFilter={(r) => getEvalTCType(r) === "self"}
2116
+ rightFilter={(r) => getEvalTCType(r) === "neg"}
2117
+ fullConfigs={fullConfigs}
2118
+ />
2119
+ )}
2120
+
2121
+ {/* Per-task breakdown — side-by-side + delta for TC-Self vs TC-GPT2 */}
2122
+ {comparisonPreset === "tcself-vs-tcgpt2" && selectedTask === "__all__" && (
2123
+ <PerTaskComparisonCollapsible
2124
+ filteredRows={comparisonBaseRows}
2125
+ pairs={tcSelfVsGpt2Pairs}
2126
+ leftLabel="TC-Self (eval)"
2127
+ rightLabel="TC-GPT2 (eval)"
2128
+ leftFilter={(r) => getEvalTCType(r) === "self"}
2129
+ rightFilter={(r) => getEvalTCType(r) === "gpt2"}
2130
+ fullConfigs={fullConfigs}
2131
+ />
2132
+ )}
2133
+ </>
2134
+ ) : (
2135
+ <div className="space-y-8">
2136
+ {METRICS.map((m) => {
2137
+ const metricAgg = aggregateData(filteredRows, m.key, getRowLabel).data;
2138
+ const metricRowLabels = sortRowLabels(Array.from(metricAgg.keys()).filter((l) => effectiveVisibleRows.has(l)));
2139
+ if (metricRowLabels.length === 0) return null;
2140
+ return (
2141
+ <BarChart
2142
+ key={m.key}
2143
+ data={metricAgg}
2144
+ rowLabels={metricRowLabels}
2145
+ evalVariants={availableEvalVariants.filter((ev) => ev === "raw" || ev === "tc")}
2146
+ metric={m.key}
2147
+ title={`${m.label}${selectedTask === "__all__" && stats.tasks > 1 ? ` (mean ± SE over ${stats.tasks} tasks)` : ""}`}
2148
+ />
2149
+ );
2150
+ })}
2151
+ </div>
2152
+ )}
2153
+ </div>
2154
+ </div>
2155
+ )}
2156
+ </div>
2157
+ );
2158
+ }
frontend/src/experiments/components/viewers/ImageViewer.tsx ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useEffect, useCallback } from "react";
2
+ import { HF_ORG } from "../../../config";
3
+
4
+ interface ImageViewerProps {
5
+ datasetRepo: string;
6
+ split?: string;
7
+ onClose: () => void;
8
+ }
9
+
10
+ interface ImageRow {
11
+ src: string;
12
+ caption: string;
13
+ rowIndex: number;
14
+ }
15
+
16
+ // Attempt to extract an image URL from a column value returned by the datasets-server API.
17
+ // Image columns come back as { src: "..." }, but plain string URLs and path strings are also handled.
18
+ function extractImageUrl(value: unknown): string | null {
19
+ if (!value) return null;
20
+ if (typeof value === "string") {
21
+ const trimmed = value.trim();
22
+ if (trimmed.startsWith("http://") || trimmed.startsWith("https://") || trimmed.startsWith("/")) {
23
+ return trimmed;
24
+ }
25
+ return null;
26
+ }
27
+ if (typeof value === "object") {
28
+ const obj = value as Record<string, unknown>;
29
+ if (typeof obj.src === "string") return obj.src;
30
+ if (typeof obj.url === "string") return obj.url;
31
+ if (typeof obj.path === "string" && (obj.path as string).startsWith("http")) return obj.path as string;
32
+ }
33
+ return null;
34
+ }
35
+
36
+ // Return the first string value that exists among the given keys in a row object.
37
+ function pickCaption(row: Record<string, unknown>, keys: string[]): string | null {
38
+ for (const key of keys) {
39
+ const v = row[key];
40
+ if (typeof v === "string" && v.trim()) return v.trim();
41
+ if (typeof v === "number") return String(v);
42
+ }
43
+ return null;
44
+ }
45
+
46
+ export default function ImageViewer({ datasetRepo, split = "train", onClose }: ImageViewerProps) {
47
+ // Ensure dataset repo has org prefix for HF API calls
48
+ const fullRepo = datasetRepo.includes("/") ? datasetRepo : `${HF_ORG}/${datasetRepo}`;
49
+
50
+ const [images, setImages] = useState<ImageRow[]>([]);
51
+ const [loading, setLoading] = useState(true);
52
+ const [error, setError] = useState<string | null>(null);
53
+ const [noImageColumns, setNoImageColumns] = useState(false);
54
+
55
+ // Lightbox state
56
+ const [lightboxIndex, setLightboxIndex] = useState<number | null>(null);
57
+
58
+ // ---- Data loading ----
59
+ useEffect(() => {
60
+ let cancelled = false;
61
+
62
+ async function fetchImages() {
63
+ setLoading(true);
64
+ setError(null);
65
+ setNoImageColumns(false);
66
+
67
+ try {
68
+ const url = `https://datasets-server.huggingface.co/rows?dataset=${encodeURIComponent(fullRepo)}&config=default&split=${encodeURIComponent(split)}&offset=0&length=20`;
69
+ const res = await fetch(url);
70
+ if (!res.ok) {
71
+ throw new Error(`Datasets server returned ${res.status}: ${res.statusText}`);
72
+ }
73
+ const data = await res.json();
74
+
75
+ const rows: Record<string, unknown>[] = data.rows?.map(
76
+ (r: { row: Record<string, unknown> }) => r.row
77
+ ) ?? [];
78
+
79
+ if (rows.length === 0) {
80
+ if (!cancelled) {
81
+ setImages([]);
82
+ setLoading(false);
83
+ }
84
+ return;
85
+ }
86
+
87
+ // Find image column(s): any column whose value for the first row resolves to an image URL,
88
+ // or whose key contains image/url/path hints.
89
+ const firstRow = rows[0];
90
+ const columnNames = Object.keys(firstRow);
91
+
92
+ // Rank columns: explicit image columns first, then url/path hints, then anything resolving
93
+ const imageColumns: string[] = [];
94
+
95
+ // Pass 1: columns that directly contain image objects or HTTP URLs
96
+ for (const col of columnNames) {
97
+ if (extractImageUrl(firstRow[col]) !== null) {
98
+ imageColumns.push(col);
99
+ }
100
+ }
101
+
102
+ // Pass 2: columns with suggestive names that we haven't already picked
103
+ if (imageColumns.length === 0) {
104
+ const hints = ["image", "img", "image_url", "url", "path", "file"];
105
+ for (const hint of hints) {
106
+ const match = columnNames.find((c) => c.toLowerCase().includes(hint));
107
+ if (match && !imageColumns.includes(match)) {
108
+ imageColumns.push(match);
109
+ }
110
+ }
111
+ }
112
+
113
+ if (imageColumns.length === 0) {
114
+ if (!cancelled) {
115
+ setNoImageColumns(true);
116
+ setLoading(false);
117
+ }
118
+ return;
119
+ }
120
+
121
+ // Caption columns (first match wins per row)
122
+ const captionKeys = ["caption", "description", "label", "title", "text", "name"];
123
+
124
+ const imageCol = imageColumns[0];
125
+ const extracted: ImageRow[] = [];
126
+
127
+ rows.forEach((row, idx) => {
128
+ const src = extractImageUrl(row[imageCol]);
129
+ if (src) {
130
+ const caption = pickCaption(row, captionKeys) ?? `Image ${idx + 1}`;
131
+ extracted.push({ src, caption, rowIndex: idx });
132
+ }
133
+ });
134
+
135
+ if (!cancelled) {
136
+ setImages(extracted);
137
+ setLoading(false);
138
+ }
139
+ } catch (err: unknown) {
140
+ if (!cancelled) {
141
+ setError(err instanceof Error ? err.message : "Failed to load images");
142
+ setLoading(false);
143
+ }
144
+ }
145
+ }
146
+
147
+ fetchImages();
148
+ return () => {
149
+ cancelled = true;
150
+ };
151
+ }, [fullRepo, split]);
152
+
153
+ // ---- Lightbox keyboard navigation ----
154
+ const closeLightbox = useCallback(() => setLightboxIndex(null), []);
155
+
156
+ const prevImage = useCallback(() => {
157
+ setLightboxIndex((i) => (i === null || i === 0 ? images.length - 1 : i - 1));
158
+ }, [images.length]);
159
+
160
+ const nextImage = useCallback(() => {
161
+ setLightboxIndex((i) => (i === null ? 0 : (i + 1) % images.length));
162
+ }, [images.length]);
163
+
164
+ useEffect(() => {
165
+ if (lightboxIndex === null) return;
166
+ const handler = (e: KeyboardEvent) => {
167
+ if (e.key === "Escape") closeLightbox();
168
+ if (e.key === "ArrowLeft") prevImage();
169
+ if (e.key === "ArrowRight") nextImage();
170
+ };
171
+ window.addEventListener("keydown", handler);
172
+ return () => window.removeEventListener("keydown", handler);
173
+ }, [lightboxIndex, closeLightbox, prevImage, nextImage]);
174
+
175
+ const shortName = datasetRepo.split("/").pop() ?? datasetRepo;
176
+
177
+ // ---- Render ----
178
+ return (
179
+ <div className="fixed inset-0 z-50 flex flex-col bg-gray-900">
180
+ {/* Header */}
181
+ <div className="flex items-center justify-between px-5 py-3 border-b border-gray-800 flex-shrink-0">
182
+ <div className="flex items-center gap-3 min-w-0">
183
+ <span className="text-sm font-semibold text-gray-200 truncate">{shortName}</span>
184
+ {datasetRepo.includes("/") && (
185
+ <span className="text-xs text-gray-600 truncate">{datasetRepo.split("/")[0]}/</span>
186
+ )}
187
+ {!loading && !error && !noImageColumns && (
188
+ <span className="text-xs text-gray-500">{images.length} images</span>
189
+ )}
190
+ </div>
191
+ <button
192
+ onClick={onClose}
193
+ className="text-gray-400 hover:text-gray-200 transition-colors ml-4 flex-shrink-0 text-xl leading-none"
194
+ aria-label="Close"
195
+ >
196
+ &times;
197
+ </button>
198
+ </div>
199
+
200
+ {/* Body */}
201
+ <div className="flex-1 overflow-y-auto p-5">
202
+ {loading && (
203
+ <div className="flex items-center justify-center h-48">
204
+ <span className="text-sm text-gray-500 animate-pulse">Loading images...</span>
205
+ </div>
206
+ )}
207
+
208
+ {!loading && error && (
209
+ <div className="flex items-center justify-center h-48">
210
+ <div className="text-center">
211
+ <p className="text-sm text-red-400 mb-1">Failed to load dataset</p>
212
+ <p className="text-xs text-gray-500">{error}</p>
213
+ </div>
214
+ </div>
215
+ )}
216
+
217
+ {!loading && !error && noImageColumns && (
218
+ <div className="flex items-center justify-center h-48">
219
+ <p className="text-sm text-gray-500 italic">No image columns found in this dataset.</p>
220
+ </div>
221
+ )}
222
+
223
+ {!loading && !error && !noImageColumns && images.length === 0 && (
224
+ <div className="flex items-center justify-center h-48">
225
+ <p className="text-sm text-gray-500 italic">No rows found in this split.</p>
226
+ </div>
227
+ )}
228
+
229
+ {!loading && !error && images.length > 0 && (
230
+ <div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-4 gap-3">
231
+ {images.map((img, idx) => (
232
+ <button
233
+ key={img.rowIndex}
234
+ onClick={() => setLightboxIndex(idx)}
235
+ className="group flex flex-col bg-gray-800 rounded-lg overflow-hidden border border-gray-700 hover:border-cyan-600 transition-colors text-left"
236
+ >
237
+ <div className="aspect-square overflow-hidden bg-gray-900 flex items-center justify-center">
238
+ <img
239
+ src={img.src}
240
+ alt={img.caption}
241
+ className="w-full h-full object-cover group-hover:opacity-90 transition-opacity"
242
+ loading="lazy"
243
+ onError={(e) => {
244
+ (e.currentTarget as HTMLImageElement).style.display = "none";
245
+ }}
246
+ />
247
+ </div>
248
+ <div className="px-2 py-1.5">
249
+ <p className="text-xs text-gray-400 truncate" title={img.caption}>
250
+ {img.caption}
251
+ </p>
252
+ </div>
253
+ </button>
254
+ ))}
255
+ </div>
256
+ )}
257
+ </div>
258
+
259
+ {/* Lightbox overlay */}
260
+ {lightboxIndex !== null && images[lightboxIndex] && (
261
+ <div
262
+ className="fixed inset-0 z-60 flex items-center justify-center bg-black/85"
263
+ onClick={closeLightbox}
264
+ >
265
+ {/* Close */}
266
+ <button
267
+ className="absolute top-4 right-5 text-gray-300 hover:text-white text-3xl leading-none z-10"
268
+ onClick={closeLightbox}
269
+ aria-label="Close lightbox"
270
+ >
271
+ &times;
272
+ </button>
273
+
274
+ {/* Prev arrow */}
275
+ {images.length > 1 && (
276
+ <button
277
+ className="absolute left-4 text-gray-300 hover:text-white text-4xl leading-none z-10 px-2 py-4"
278
+ onClick={(e) => { e.stopPropagation(); prevImage(); }}
279
+ aria-label="Previous image"
280
+ >
281
+ &#8249;
282
+ </button>
283
+ )}
284
+
285
+ {/* Image */}
286
+ <div
287
+ className="max-w-[90vw] max-h-[90vh] flex flex-col items-center gap-3"
288
+ onClick={(e) => e.stopPropagation()}
289
+ >
290
+ <img
291
+ src={images[lightboxIndex].src}
292
+ alt={images[lightboxIndex].caption}
293
+ className="max-w-full max-h-[80vh] object-contain rounded shadow-2xl"
294
+ />
295
+ <p className="text-sm text-gray-300 text-center max-w-lg px-4">
296
+ {images[lightboxIndex].caption}
297
+ </p>
298
+ {images.length > 1 && (
299
+ <p className="text-xs text-gray-600">
300
+ {lightboxIndex + 1} / {images.length}
301
+ </p>
302
+ )}
303
+ </div>
304
+
305
+ {/* Next arrow */}
306
+ {images.length > 1 && (
307
+ <button
308
+ className="absolute right-4 text-gray-300 hover:text-white text-4xl leading-none z-10 px-2 py-4"
309
+ onClick={(e) => { e.stopPropagation(); nextImage(); }}
310
+ aria-label="Next image"
311
+ >
312
+ &#8250;
313
+ </button>
314
+ )}
315
+ </div>
316
+ )}
317
+ </div>
318
+ );
319
+ }
frontend/src/experiments/components/viewers/PlotlyViewer.tsx ADDED
@@ -0,0 +1,422 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useEffect, useCallback } from "react";
2
+ import { HF_ORG } from "../../../config";
3
+
4
+ // ---------------------------------------------------------------------------
5
+ // Types
6
+ // ---------------------------------------------------------------------------
7
+
8
+ interface PlotlyViewerProps {
9
+ datasetRepo: string;
10
+ split?: string;
11
+ onClose: () => void;
12
+ }
13
+
14
+ interface HfRow {
15
+ [key: string]: unknown;
16
+ }
17
+
18
+ interface ColumnStats {
19
+ name: string;
20
+ min: number;
21
+ max: number;
22
+ mean: number;
23
+ count: number;
24
+ }
25
+
26
+ type ViewerMode = "plotly_json" | "numeric_summary" | "empty";
27
+
28
+ // ---------------------------------------------------------------------------
29
+ // Constants
30
+ // ---------------------------------------------------------------------------
31
+
32
+ const PLOTLY_COLUMN_NAMES = ["plotly_json", "chart_data", "figure_json"];
33
+ const HF_DATASETS_API = "https://datasets-server.huggingface.co";
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // Helpers
37
+ // ---------------------------------------------------------------------------
38
+
39
+ function detectPlotlyColumn(row: HfRow): string | null {
40
+ for (const col of PLOTLY_COLUMN_NAMES) {
41
+ if (col in row) return col;
42
+ }
43
+ return null;
44
+ }
45
+
46
+ function extractNumericColumns(rows: HfRow[]): ColumnStats[] {
47
+ if (rows.length === 0) return [];
48
+ const firstRow = rows[0];
49
+ const stats: ColumnStats[] = [];
50
+
51
+ for (const key of Object.keys(firstRow)) {
52
+ const values = rows
53
+ .map((r) => r[key])
54
+ .filter((v) => typeof v === "number" && isFinite(v as number)) as number[];
55
+
56
+ if (values.length === 0) continue;
57
+
58
+ const min = Math.min(...values);
59
+ const max = Math.max(...values);
60
+ const mean = values.reduce((a, b) => a + b, 0) / values.length;
61
+ stats.push({ name: key, min, max, mean, count: values.length });
62
+ }
63
+
64
+ return stats;
65
+ }
66
+
67
+ function tryParseJson(raw: unknown): object | null {
68
+ if (typeof raw === "object" && raw !== null) return raw as object;
69
+ if (typeof raw === "string") {
70
+ try {
71
+ const parsed = JSON.parse(raw);
72
+ if (typeof parsed === "object" && parsed !== null) return parsed;
73
+ } catch {
74
+ // not valid JSON
75
+ }
76
+ }
77
+ return null;
78
+ }
79
+
80
+ // ---------------------------------------------------------------------------
81
+ // Sub-components
82
+ // ---------------------------------------------------------------------------
83
+
84
+ function CopyButton({ text }: { text: string }) {
85
+ const [copied, setCopied] = useState(false);
86
+
87
+ const handleCopy = async () => {
88
+ try {
89
+ await navigator.clipboard.writeText(text);
90
+ setCopied(true);
91
+ setTimeout(() => setCopied(false), 2000);
92
+ } catch {
93
+ // clipboard not available
94
+ }
95
+ };
96
+
97
+ return (
98
+ <button
99
+ onClick={handleCopy}
100
+ className="text-xs bg-gray-700 hover:bg-gray-600 text-gray-300 hover:text-gray-100 px-2.5 py-1 rounded transition-colors border border-gray-600"
101
+ >
102
+ {copied ? "Copied!" : "Copy JSON"}
103
+ </button>
104
+ );
105
+ }
106
+
107
+ function PlotlyJsonView({ jsonData }: { jsonData: object }) {
108
+ const formatted = JSON.stringify(jsonData, null, 2);
109
+
110
+ // Detect chart type for a helpful header badge
111
+ const chartType =
112
+ (jsonData as Record<string, unknown>).type ||
113
+ (
114
+ (jsonData as Record<string, unknown[]>).data?.[0] as
115
+ | Record<string, unknown>
116
+ | undefined
117
+ )?.type ||
118
+ null;
119
+
120
+ return (
121
+ <div className="flex flex-col h-full gap-3">
122
+ {/* Info bar */}
123
+ <div className="flex items-center justify-between gap-3 flex-shrink-0">
124
+ <div className="flex items-center gap-2">
125
+ <span className="text-xs text-amber-400 bg-amber-900/30 border border-amber-700/40 px-2 py-0.5 rounded">
126
+ plotly JSON
127
+ </span>
128
+ {chartType && (
129
+ <span className="text-xs text-gray-400">
130
+ type:{" "}
131
+ <span className="text-gray-300 font-mono">
132
+ {String(chartType)}
133
+ </span>
134
+ </span>
135
+ )}
136
+ <span className="text-xs text-gray-500 italic">
137
+ Install{" "}
138
+ <code className="text-gray-400 bg-gray-800 px-1 py-0.5 rounded text-[11px]">
139
+ react-plotly.js
140
+ </code>{" "}
141
+ for interactive rendering
142
+ </span>
143
+ </div>
144
+ <CopyButton text={formatted} />
145
+ </div>
146
+
147
+ {/* JSON code block */}
148
+ <div className="flex-1 overflow-auto rounded-lg border border-gray-700 bg-gray-950">
149
+ <pre className="p-4 text-xs text-gray-300 font-mono leading-relaxed whitespace-pre-wrap break-words">
150
+ {formatted}
151
+ </pre>
152
+ </div>
153
+ </div>
154
+ );
155
+ }
156
+
157
+ function NumericSummaryView({ stats }: { stats: ColumnStats[] }) {
158
+ if (stats.length === 0) {
159
+ return (
160
+ <div className="flex items-center justify-center h-32">
161
+ <p className="text-sm text-gray-500 italic">
162
+ No numeric columns found in dataset.
163
+ </p>
164
+ </div>
165
+ );
166
+ }
167
+
168
+ return (
169
+ <div className="flex flex-col gap-3">
170
+ <p className="text-xs text-gray-500">
171
+ No Plotly JSON column detected (
172
+ <code className="text-gray-400">
173
+ {PLOTLY_COLUMN_NAMES.join(", ")}
174
+ </code>
175
+ ). Showing numeric column summary.
176
+ </p>
177
+ <div className="overflow-x-auto rounded-lg border border-gray-700">
178
+ <table className="w-full text-sm">
179
+ <thead>
180
+ <tr className="border-b border-gray-700 bg-gray-800/50">
181
+ <th className="text-left py-2 px-3 text-xs text-gray-400 uppercase tracking-wide font-medium">
182
+ Column
183
+ </th>
184
+ <th className="text-right py-2 px-3 text-xs text-gray-400 uppercase tracking-wide font-medium">
185
+ Count
186
+ </th>
187
+ <th className="text-right py-2 px-3 text-xs text-gray-400 uppercase tracking-wide font-medium">
188
+ Min
189
+ </th>
190
+ <th className="text-right py-2 px-3 text-xs text-gray-400 uppercase tracking-wide font-medium">
191
+ Max
192
+ </th>
193
+ <th className="text-right py-2 px-3 text-xs text-gray-400 uppercase tracking-wide font-medium">
194
+ Mean
195
+ </th>
196
+ </tr>
197
+ </thead>
198
+ <tbody>
199
+ {stats.map((col) => (
200
+ <tr
201
+ key={col.name}
202
+ className="border-b border-gray-800 hover:bg-gray-800/30 transition-colors"
203
+ >
204
+ <td className="py-2 px-3 font-mono text-xs text-cyan-300">
205
+ {col.name}
206
+ </td>
207
+ <td className="py-2 px-3 text-right text-xs text-gray-400 font-mono">
208
+ {col.count}
209
+ </td>
210
+ <td className="py-2 px-3 text-right text-xs text-gray-300 font-mono">
211
+ {col.min.toLocaleString(undefined, {
212
+ maximumFractionDigits: 4,
213
+ })}
214
+ </td>
215
+ <td className="py-2 px-3 text-right text-xs text-gray-300 font-mono">
216
+ {col.max.toLocaleString(undefined, {
217
+ maximumFractionDigits: 4,
218
+ })}
219
+ </td>
220
+ <td className="py-2 px-3 text-right text-xs text-gray-300 font-mono">
221
+ {col.mean.toLocaleString(undefined, {
222
+ maximumFractionDigits: 4,
223
+ })}
224
+ </td>
225
+ </tr>
226
+ ))}
227
+ </tbody>
228
+ </table>
229
+ </div>
230
+ </div>
231
+ );
232
+ }
233
+
234
+ // ---------------------------------------------------------------------------
235
+ // Main Component
236
+ // ---------------------------------------------------------------------------
237
+
238
+ export default function PlotlyViewer({
239
+ datasetRepo,
240
+ split = "train",
241
+ onClose,
242
+ }: PlotlyViewerProps) {
243
+ // Ensure dataset repo has org prefix for HF API calls
244
+ const fullRepo = datasetRepo.includes("/") ? datasetRepo : `${HF_ORG}/${datasetRepo}`;
245
+
246
+ const [loading, setLoading] = useState(true);
247
+ const [error, setError] = useState<string | null>(null);
248
+
249
+ const [mode, setMode] = useState<ViewerMode>("empty");
250
+ const [plotlyJson, setPlotlyJson] = useState<object | null>(null);
251
+ const [numericStats, setNumericStats] = useState<ColumnStats[]>([]);
252
+ const [rowCount, setRowCount] = useState<number>(0);
253
+
254
+ const shortName = datasetRepo.split("/").pop() ?? datasetRepo;
255
+
256
+ const fetchData = useCallback(async () => {
257
+ setLoading(true);
258
+ setError(null);
259
+ try {
260
+ // Fetch first page of rows (up to 100 for numeric summary)
261
+ const url = `${HF_DATASETS_API}/rows?dataset=${encodeURIComponent(
262
+ fullRepo
263
+ )}&config=default&split=${encodeURIComponent(split)}&offset=0&length=100`;
264
+
265
+ const resp = await fetch(url);
266
+ if (!resp.ok) {
267
+ const text = await resp.text();
268
+ throw new Error(`HF API error ${resp.status}: ${text.slice(0, 200)}`);
269
+ }
270
+
271
+ const data = (await resp.json()) as {
272
+ rows?: { row: HfRow }[];
273
+ num_rows_total?: number;
274
+ };
275
+
276
+ const rows: HfRow[] = (data.rows ?? []).map((r) => r.row);
277
+ setRowCount(data.num_rows_total ?? rows.length);
278
+
279
+ if (rows.length === 0) {
280
+ setMode("empty");
281
+ return;
282
+ }
283
+
284
+ // Try to find a plotly JSON column
285
+ const plotlyCol = detectPlotlyColumn(rows[0]);
286
+ if (plotlyCol !== null) {
287
+ // Use the first row's plotly JSON
288
+ const parsed = tryParseJson(rows[0][plotlyCol]);
289
+ if (parsed !== null) {
290
+ setPlotlyJson(parsed);
291
+ setMode("plotly_json");
292
+ return;
293
+ }
294
+ }
295
+
296
+ // Fallback: numeric summary
297
+ const stats = extractNumericColumns(rows);
298
+ setNumericStats(stats);
299
+ setMode(stats.length > 0 ? "numeric_summary" : "empty");
300
+ } catch (err) {
301
+ setError(err instanceof Error ? err.message : String(err));
302
+ } finally {
303
+ setLoading(false);
304
+ }
305
+ }, [fullRepo, split]);
306
+
307
+ useEffect(() => {
308
+ fetchData();
309
+ }, [fetchData]);
310
+
311
+ return (
312
+ <div className="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm">
313
+ <div className="w-full max-w-5xl h-[85vh] bg-gray-900 rounded-xl border border-gray-700 shadow-2xl flex flex-col overflow-hidden">
314
+ {/* Header */}
315
+ <div className="flex items-center justify-between px-5 py-3 border-b border-gray-700 flex-shrink-0">
316
+ <div className="flex items-center gap-3 min-w-0">
317
+ <span className="text-sm font-semibold text-gray-200 truncate">
318
+ {shortName}
319
+ </span>
320
+ {datasetRepo.includes("/") && (
321
+ <span className="text-xs text-gray-500 truncate hidden sm:block">
322
+ {datasetRepo.split("/")[0]}/
323
+ </span>
324
+ )}
325
+ <span className="text-xs text-gray-600 border border-gray-700 px-1.5 py-0.5 rounded">
326
+ {split}
327
+ </span>
328
+ {!loading && rowCount > 0 && (
329
+ <span className="text-xs text-gray-600">
330
+ {rowCount.toLocaleString()} rows
331
+ </span>
332
+ )}
333
+ </div>
334
+
335
+ <div className="flex items-center gap-2 flex-shrink-0">
336
+ <a
337
+ href={`https://huggingface.co/datasets/${fullRepo}`}
338
+ target="_blank"
339
+ rel="noopener noreferrer"
340
+ className="text-xs text-gray-500 hover:text-cyan-400 transition-colors px-2 py-1 border border-gray-700 rounded"
341
+ >
342
+ HF
343
+ </a>
344
+ <button
345
+ onClick={onClose}
346
+ className="text-gray-400 hover:text-gray-200 transition-colors p-1 rounded hover:bg-gray-700"
347
+ aria-label="Close viewer"
348
+ >
349
+ <svg
350
+ xmlns="http://www.w3.org/2000/svg"
351
+ className="h-4 w-4"
352
+ fill="none"
353
+ viewBox="0 0 24 24"
354
+ stroke="currentColor"
355
+ strokeWidth={2}
356
+ >
357
+ <path
358
+ strokeLinecap="round"
359
+ strokeLinejoin="round"
360
+ d="M6 18L18 6M6 6l12 12"
361
+ />
362
+ </svg>
363
+ </button>
364
+ </div>
365
+ </div>
366
+
367
+ {/* Body */}
368
+ <div className="flex-1 overflow-hidden p-5">
369
+ {loading && (
370
+ <div className="flex items-center justify-center h-full">
371
+ <div className="flex flex-col items-center gap-3">
372
+ <div className="w-6 h-6 border-2 border-cyan-500 border-t-transparent rounded-full animate-spin" />
373
+ <p className="text-sm text-gray-400">Loading dataset...</p>
374
+ </div>
375
+ </div>
376
+ )}
377
+
378
+ {!loading && error && (
379
+ <div className="flex items-center justify-center h-full">
380
+ <div className="max-w-lg text-center space-y-3">
381
+ <p className="text-sm font-medium text-red-400">
382
+ Failed to load dataset
383
+ </p>
384
+ <p className="text-xs text-gray-500 font-mono break-words bg-gray-800 rounded p-3">
385
+ {error}
386
+ </p>
387
+ <button
388
+ onClick={fetchData}
389
+ className="text-xs text-cyan-400 hover:text-cyan-300 border border-cyan-700/50 px-3 py-1 rounded transition-colors"
390
+ >
391
+ Retry
392
+ </button>
393
+ </div>
394
+ </div>
395
+ )}
396
+
397
+ {!loading && !error && mode === "empty" && (
398
+ <div className="flex items-center justify-center h-full">
399
+ <div className="text-center space-y-2">
400
+ <p className="text-sm text-gray-400">Dataset is empty or has no renderable columns.</p>
401
+ <p className="text-xs text-gray-600">
402
+ Expected columns for Plotly:{" "}
403
+ <code className="text-gray-500">
404
+ {PLOTLY_COLUMN_NAMES.join(", ")}
405
+ </code>
406
+ </p>
407
+ </div>
408
+ </div>
409
+ )}
410
+
411
+ {!loading && !error && mode === "plotly_json" && plotlyJson && (
412
+ <PlotlyJsonView jsonData={plotlyJson} />
413
+ )}
414
+
415
+ {!loading && !error && mode === "numeric_summary" && (
416
+ <NumericSummaryView stats={numericStats} />
417
+ )}
418
+ </div>
419
+ </div>
420
+ </div>
421
+ );
422
+ }
frontend/src/experiments/components/viewers/TableViewer.tsx ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useEffect, useCallback, useRef } from "react";
2
+ import { HF_ORG } from "../../../config";
3
+
4
+ const PAGE_SIZE = 100;
5
+ const CELL_TRUNCATE_LEN = 200;
6
+
7
+ interface HfRow {
8
+ row_idx: number;
9
+ row: Record<string, unknown>;
10
+ }
11
+
12
+ interface HfFeature {
13
+ feature_idx: number;
14
+ name: string;
15
+ type: Record<string, unknown>;
16
+ }
17
+
18
+ interface HfResponse {
19
+ rows: HfRow[];
20
+ features: HfFeature[];
21
+ num_rows_total: number;
22
+ }
23
+
24
+ interface TableViewerProps {
25
+ datasetRepo: string;
26
+ split?: string;
27
+ onClose: () => void;
28
+ }
29
+
30
+ // ─── Cell Expansion Modal ─────────────────────────────────────────────────────
31
+
32
+ interface CellModalProps {
33
+ value: string;
34
+ colName: string;
35
+ onClose: () => void;
36
+ }
37
+
38
+ function CellModal({ value, colName, onClose }: CellModalProps) {
39
+ const [copied, setCopied] = useState(false);
40
+
41
+ const handleCopy = useCallback(async () => {
42
+ try {
43
+ await navigator.clipboard.writeText(value);
44
+ setCopied(true);
45
+ setTimeout(() => setCopied(false), 2000);
46
+ } catch {
47
+ // fallback: do nothing
48
+ }
49
+ }, [value]);
50
+
51
+ // Close on Escape
52
+ useEffect(() => {
53
+ const handler = (e: KeyboardEvent) => {
54
+ if (e.key === "Escape") onClose();
55
+ };
56
+ window.addEventListener("keydown", handler);
57
+ return () => window.removeEventListener("keydown", handler);
58
+ }, [onClose]);
59
+
60
+ return (
61
+ <div
62
+ className="fixed inset-0 z-50 flex items-center justify-center bg-black/80 p-4"
63
+ onClick={(e) => {
64
+ if (e.target === e.currentTarget) onClose();
65
+ }}
66
+ >
67
+ <div className="bg-gray-900 border border-gray-700 rounded-lg w-full max-w-4xl max-h-[90vh] flex flex-col shadow-2xl">
68
+ {/* Header */}
69
+ <div className="flex items-center justify-between px-4 py-3 border-b border-gray-700 flex-shrink-0">
70
+ <span className="text-sm font-medium text-gray-200 truncate max-w-[80%]">
71
+ {colName}
72
+ </span>
73
+ <div className="flex items-center gap-2">
74
+ <button
75
+ onClick={handleCopy}
76
+ className="text-xs px-3 py-1 rounded bg-gray-700 hover:bg-gray-600 text-gray-300 transition-colors border border-gray-600"
77
+ title="Copy to clipboard"
78
+ >
79
+ {copied ? "Copied!" : "Copy"}
80
+ </button>
81
+ <button
82
+ onClick={onClose}
83
+ className="text-gray-400 hover:text-gray-200 transition-colors text-lg leading-none px-1"
84
+ title="Close (Esc)"
85
+ >
86
+ ×
87
+ </button>
88
+ </div>
89
+ </div>
90
+
91
+ {/* Full value — scrollable, monospace */}
92
+ <div className="flex-1 overflow-auto p-4">
93
+ <pre className="font-mono text-sm text-gray-100 whitespace-pre-wrap break-words leading-relaxed">
94
+ {value}
95
+ </pre>
96
+ </div>
97
+
98
+ {/* Footer with char count */}
99
+ <div className="px-4 py-2 border-t border-gray-700 flex-shrink-0">
100
+ <span className="text-xs text-gray-500">{value.length.toLocaleString()} characters — complete, untruncated</span>
101
+ </div>
102
+ </div>
103
+ </div>
104
+ );
105
+ }
106
+
107
+ // ─── Table Cell ───────────────────────────────────────────────────────────────
108
+
109
+ interface CellProps {
110
+ value: unknown;
111
+ colName: string;
112
+ onExpand: (value: string, colName: string) => void;
113
+ }
114
+
115
+ function TableCell({ value, colName, onExpand }: CellProps) {
116
+ const str = value === null || value === undefined
117
+ ? ""
118
+ : typeof value === "object"
119
+ ? JSON.stringify(value, null, 2)
120
+ : String(value);
121
+
122
+ const isTruncated = str.length > CELL_TRUNCATE_LEN;
123
+ const display = isTruncated ? str.slice(0, CELL_TRUNCATE_LEN) + "..." : str;
124
+
125
+ return (
126
+ <td className="px-3 py-2 text-xs text-gray-300 max-w-xs border-b border-gray-800 align-top">
127
+ <span className="whitespace-pre-wrap break-words">{display}</span>
128
+ {isTruncated && (
129
+ <button
130
+ onClick={() => onExpand(str, colName)}
131
+ className="ml-1 text-cyan-400 hover:text-cyan-300 text-xs underline underline-offset-2 transition-colors whitespace-nowrap"
132
+ title="Show complete value"
133
+ >
134
+ Show more
135
+ </button>
136
+ )}
137
+ </td>
138
+ );
139
+ }
140
+
141
+ // ─── Sort icon ────────────────────────────────────────────────────────────────
142
+
143
+ function SortIcon({ dir }: { dir: "asc" | "desc" | null }) {
144
+ if (dir === null) return <span className="text-gray-600 ml-1 text-xs">⇅</span>;
145
+ if (dir === "asc") return <span className="text-cyan-400 ml-1 text-xs">↑</span>;
146
+ return <span className="text-cyan-400 ml-1 text-xs">↓</span>;
147
+ }
148
+
149
+ // ─── Main Component ───────────────────────────────────────────────────────────
150
+
151
+ export default function TableViewer({ datasetRepo, split = "train", onClose }: TableViewerProps) {
152
+ // Ensure dataset repo has org prefix for HF API calls
153
+ const fullRepo = datasetRepo.includes("/") ? datasetRepo : `${HF_ORG}/${datasetRepo}`;
154
+
155
+ const [rows, setRows] = useState<HfRow[]>([]);
156
+ const [columns, setColumns] = useState<string[]>([]);
157
+ const [totalRows, setTotalRows] = useState<number | null>(null);
158
+ const [loading, setLoading] = useState(true);
159
+ const [error, setError] = useState<string | null>(null);
160
+ const [page, setPage] = useState(0);
161
+
162
+ const [searchQuery, setSearchQuery] = useState("");
163
+ const [sortCol, setSortCol] = useState<string | null>(null);
164
+ const [sortDir, setSortDir] = useState<"asc" | "desc">("asc");
165
+
166
+ const [expandedCell, setExpandedCell] = useState<{ value: string; colName: string } | null>(null);
167
+
168
+ const fetchRef = useRef(0);
169
+
170
+ const fetchRows = useCallback(async (pageIndex: number) => {
171
+ setLoading(true);
172
+ setError(null);
173
+
174
+ const fetchId = ++fetchRef.current;
175
+ const offset = pageIndex * PAGE_SIZE;
176
+ const baseUrl = "https://datasets-server.huggingface.co/rows";
177
+ const urlWithConfig = `${baseUrl}?dataset=${encodeURIComponent(fullRepo)}&config=default&split=${split}&offset=${offset}&length=${PAGE_SIZE}`;
178
+ const urlWithoutConfig = `${baseUrl}?dataset=${encodeURIComponent(fullRepo)}&split=${split}&offset=${offset}&length=${PAGE_SIZE}`;
179
+
180
+ let data: HfResponse | null = null;
181
+
182
+ try {
183
+ const res = await fetch(urlWithConfig);
184
+ if (res.ok) {
185
+ data = await res.json() as HfResponse;
186
+ } else {
187
+ // Try without config param
188
+ const res2 = await fetch(urlWithoutConfig);
189
+ if (res2.ok) {
190
+ data = await res2.json() as HfResponse;
191
+ } else {
192
+ const errText = await res2.text();
193
+ throw new Error(`API error ${res2.status}: ${errText.slice(0, 200)}`);
194
+ }
195
+ }
196
+ } catch (e) {
197
+ if (fetchRef.current === fetchId) {
198
+ setError(e instanceof Error ? e.message : "Failed to fetch dataset rows");
199
+ setLoading(false);
200
+ }
201
+ return;
202
+ }
203
+
204
+ if (fetchRef.current !== fetchId) return;
205
+
206
+ if (data) {
207
+ const cols = data.features.map((f) => f.name);
208
+ setColumns(cols);
209
+ setRows(data.rows);
210
+ setTotalRows(data.num_rows_total);
211
+ }
212
+ setLoading(false);
213
+ }, [fullRepo, split]);
214
+
215
+ useEffect(() => {
216
+ fetchRows(page);
217
+ }, [fetchRows, page]);
218
+
219
+ // ── Derived: search + sort applied to currently loaded page ──
220
+ const filteredRows = (() => {
221
+ let result = rows;
222
+
223
+ if (searchQuery.trim()) {
224
+ const q = searchQuery.toLowerCase();
225
+ result = result.filter((r) =>
226
+ columns.some((col) => {
227
+ const v = r.row[col];
228
+ if (v === null || v === undefined) return false;
229
+ return String(typeof v === "object" ? JSON.stringify(v) : v).toLowerCase().includes(q);
230
+ })
231
+ );
232
+ }
233
+
234
+ if (sortCol) {
235
+ result = [...result].sort((a, b) => {
236
+ const av = a.row[sortCol];
237
+ const bv = b.row[sortCol];
238
+ const as = av === null || av === undefined ? "" : typeof av === "object" ? JSON.stringify(av) : String(av);
239
+ const bs = bv === null || bv === undefined ? "" : typeof bv === "object" ? JSON.stringify(bv) : String(bv);
240
+ // Try numeric sort first
241
+ const an = Number(as);
242
+ const bn = Number(bs);
243
+ if (!isNaN(an) && !isNaN(bn)) {
244
+ return sortDir === "asc" ? an - bn : bn - an;
245
+ }
246
+ return sortDir === "asc" ? as.localeCompare(bs) : bs.localeCompare(as);
247
+ });
248
+ }
249
+
250
+ return result;
251
+ })();
252
+
253
+ const handleSort = (col: string) => {
254
+ if (sortCol === col) {
255
+ if (sortDir === "asc") {
256
+ setSortDir("desc");
257
+ } else {
258
+ setSortCol(null);
259
+ setSortDir("asc");
260
+ }
261
+ } else {
262
+ setSortCol(col);
263
+ setSortDir("asc");
264
+ }
265
+ };
266
+
267
+ const handleExpand = (value: string, colName: string) => {
268
+ setExpandedCell({ value, colName });
269
+ };
270
+
271
+ const startRow = page * PAGE_SIZE + 1;
272
+ const endRow = totalRows !== null ? Math.min((page + 1) * PAGE_SIZE, totalRows) : (page + 1) * PAGE_SIZE;
273
+ const hasPrev = page > 0;
274
+ const hasNext = totalRows !== null ? (page + 1) * PAGE_SIZE < totalRows : rows.length === PAGE_SIZE;
275
+
276
+ const shortName = datasetRepo.split("/").pop() ?? datasetRepo;
277
+
278
+ return (
279
+ <>
280
+ {/* Full-screen overlay */}
281
+ <div className="fixed inset-0 z-40 bg-gray-950 flex flex-col">
282
+ {/* ── Top bar ────────────────────────────────────────────────── */}
283
+ <div className="flex items-center justify-between px-4 py-3 border-b border-gray-800 flex-shrink-0 bg-gray-900">
284
+ <div className="flex items-center gap-3 min-w-0">
285
+ <span className="text-xs text-gray-500 uppercase tracking-wider font-medium">Table Viewer</span>
286
+ <span className="text-gray-600">·</span>
287
+ <span className="text-sm font-medium text-gray-200 truncate" title={datasetRepo}>
288
+ {shortName}
289
+ </span>
290
+ <span className="text-xs text-gray-500 truncate hidden sm:block" title={datasetRepo}>
291
+ ({datasetRepo.split("/")[0]})
292
+ </span>
293
+ {totalRows !== null && (
294
+ <span className="text-xs text-gray-500 flex-shrink-0">
295
+ {totalRows.toLocaleString()} rows
296
+ </span>
297
+ )}
298
+ </div>
299
+
300
+ <div className="flex items-center gap-3 flex-shrink-0">
301
+ {/* Search */}
302
+ <input
303
+ type="text"
304
+ placeholder="Search all cells…"
305
+ value={searchQuery}
306
+ onChange={(e) => setSearchQuery(e.target.value)}
307
+ className="text-xs bg-gray-800 border border-gray-700 rounded px-2.5 py-1.5 text-gray-200 placeholder-gray-500 focus:outline-none focus:border-cyan-600 w-48 sm:w-64"
308
+ />
309
+
310
+ {/* Close */}
311
+ <button
312
+ onClick={onClose}
313
+ className="text-gray-400 hover:text-gray-200 transition-colors text-xl leading-none px-1"
314
+ title="Close viewer"
315
+ >
316
+ ×
317
+ </button>
318
+ </div>
319
+ </div>
320
+
321
+ {/* ── Body ─────────────────────────────────────────────────────── */}
322
+ <div className="flex-1 overflow-auto">
323
+ {loading && (
324
+ <div className="flex items-center justify-center h-32">
325
+ <div className="flex items-center gap-3 text-gray-400">
326
+ <svg className="animate-spin h-5 w-5" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
327
+ <circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" />
328
+ <path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z" />
329
+ </svg>
330
+ <span className="text-sm">Loading rows…</span>
331
+ </div>
332
+ </div>
333
+ )}
334
+
335
+ {error && !loading && (
336
+ <div className="flex flex-col items-center justify-center h-32 gap-3">
337
+ <p className="text-sm text-red-400">{error}</p>
338
+ <button
339
+ onClick={() => fetchRows(page)}
340
+ className="text-xs px-3 py-1.5 rounded bg-gray-700 hover:bg-gray-600 text-gray-300 border border-gray-600 transition-colors"
341
+ >
342
+ Retry
343
+ </button>
344
+ </div>
345
+ )}
346
+
347
+ {!loading && !error && filteredRows.length === 0 && (
348
+ <div className="flex items-center justify-center h-32">
349
+ <p className="text-sm text-gray-500 italic">
350
+ {searchQuery.trim() ? "No rows match your search." : "No rows returned."}
351
+ </p>
352
+ </div>
353
+ )}
354
+
355
+ {!loading && !error && filteredRows.length > 0 && (
356
+ <table className="w-full border-collapse text-left table-auto">
357
+ <thead className="sticky top-0 z-10 bg-gray-900">
358
+ <tr>
359
+ {/* Row index column */}
360
+ <th className="px-3 py-2.5 text-xs font-medium text-gray-500 border-b border-gray-700 w-12 bg-gray-900">
361
+ #
362
+ </th>
363
+ {columns.map((col) => (
364
+ <th
365
+ key={col}
366
+ onClick={() => handleSort(col)}
367
+ className="px-3 py-2.5 text-xs font-medium text-gray-300 border-b border-gray-700 cursor-pointer select-none hover:text-gray-100 hover:bg-gray-800 transition-colors whitespace-nowrap bg-gray-900"
368
+ >
369
+ {col}
370
+ <SortIcon dir={sortCol === col ? sortDir : null} />
371
+ </th>
372
+ ))}
373
+ </tr>
374
+ </thead>
375
+ <tbody>
376
+ {filteredRows.map((r) => (
377
+ <tr
378
+ key={r.row_idx}
379
+ className="hover:bg-gray-800/40 transition-colors"
380
+ >
381
+ <td className="px-3 py-2 text-xs text-gray-600 border-b border-gray-800 align-top tabular-nums">
382
+ {r.row_idx}
383
+ </td>
384
+ {columns.map((col) => (
385
+ <TableCell
386
+ key={col}
387
+ value={r.row[col]}
388
+ colName={col}
389
+ onExpand={handleExpand}
390
+ />
391
+ ))}
392
+ </tr>
393
+ ))}
394
+ </tbody>
395
+ </table>
396
+ )}
397
+ </div>
398
+
399
+ {/* ── Pagination bar ───────────────────────────────────────────── */}
400
+ {!loading && !error && totalRows !== null && (
401
+ <div className="flex items-center justify-between px-4 py-2.5 border-t border-gray-800 bg-gray-900 flex-shrink-0">
402
+ <span className="text-xs text-gray-500">
403
+ {searchQuery.trim()
404
+ ? `${filteredRows.length} matching rows on this page`
405
+ : `Rows ${startRow}–${endRow} of ${totalRows.toLocaleString()}`}
406
+ </span>
407
+ <div className="flex items-center gap-2">
408
+ <button
409
+ onClick={() => setPage((p) => p - 1)}
410
+ disabled={!hasPrev}
411
+ className="text-xs px-3 py-1 rounded bg-gray-800 hover:bg-gray-700 text-gray-300 border border-gray-700 transition-colors disabled:opacity-40 disabled:cursor-not-allowed"
412
+ >
413
+ ← Prev
414
+ </button>
415
+ <span className="text-xs text-gray-500 tabular-nums">
416
+ Page {page + 1} / {Math.ceil(totalRows / PAGE_SIZE)}
417
+ </span>
418
+ <button
419
+ onClick={() => setPage((p) => p + 1)}
420
+ disabled={!hasNext}
421
+ className="text-xs px-3 py-1 rounded bg-gray-800 hover:bg-gray-700 text-gray-300 border border-gray-700 transition-colors disabled:opacity-40 disabled:cursor-not-allowed"
422
+ >
423
+ Next →
424
+ </button>
425
+ </div>
426
+ </div>
427
+ )}
428
+ </div>
429
+
430
+ {/* ── Cell Expansion Modal ─────────────────────────────────────── */}
431
+ {expandedCell && (
432
+ <CellModal
433
+ value={expandedCell.value}
434
+ colName={expandedCell.colName}
435
+ onClose={() => setExpandedCell(null)}
436
+ />
437
+ )}
438
+ </>
439
+ );
440
+ }
frontend/src/experiments/components/viewers/YamlViewer.tsx ADDED
@@ -0,0 +1,588 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useEffect, useCallback, useRef } from "react";
2
+ import { HF_ORG } from "../../../config";
3
+
4
+ // ─── Types ────────────────────────────────────────────────────────────────────
5
+
6
+ interface HfRow {
7
+ row_idx: number;
8
+ row: Record<string, unknown>;
9
+ }
10
+
11
+ interface HfFeature {
12
+ feature_idx: number;
13
+ name: string;
14
+ type: Record<string, unknown>;
15
+ }
16
+
17
+ interface HfResponse {
18
+ rows: HfRow[];
19
+ features: HfFeature[];
20
+ num_rows_total: number;
21
+ }
22
+
23
+ interface YamlViewerProps {
24
+ datasetRepo: string;
25
+ split?: string;
26
+ onClose: () => void;
27
+ }
28
+
29
+ // ─── Syntax Highlighting ──────────────────────────────────────────────────────
30
+
31
+ /**
32
+ * Color scheme for JSON tokens in dark theme.
33
+ * Keys: cyan, strings: green, numbers: amber, booleans: purple, null: gray.
34
+ */
35
+ function getValueColor(value: unknown): string {
36
+ if (value === null || value === undefined) return "text-gray-400";
37
+ if (typeof value === "boolean") return "text-purple-400";
38
+ if (typeof value === "number") return "text-amber-400";
39
+ if (typeof value === "string") return "text-green-400";
40
+ return "text-gray-200";
41
+ }
42
+
43
+ function StringValue({ value }: { value: string }) {
44
+ // Render quoted string with green color
45
+ return <span className="text-green-400">&quot;{value}&quot;</span>;
46
+ }
47
+
48
+ function PrimitiveValue({ value }: { value: unknown }) {
49
+ if (value === null) return <span className="text-gray-400">null</span>;
50
+ if (typeof value === "boolean")
51
+ return <span className="text-purple-400">{String(value)}</span>;
52
+ if (typeof value === "number")
53
+ return <span className="text-amber-400">{String(value)}</span>;
54
+ if (typeof value === "string") return <StringValue value={value} />;
55
+ return <span className={getValueColor(value)}>{String(value)}</span>;
56
+ }
57
+
58
+ // ─── Recursive JSON Node ──────────────────────────────────────────────────────
59
+
60
+ interface JsonNodeProps {
61
+ data: unknown;
62
+ depth?: number;
63
+ /** If provided, render this key inline before the value */
64
+ keyName?: string;
65
+ /** Whether this node is the last sibling (no trailing comma) */
66
+ isLast?: boolean;
67
+ /** For top-level keys on the root object: controlled collapse state */
68
+ collapsed?: boolean;
69
+ onToggleCollapse?: () => void;
70
+ }
71
+
72
+ function JsonNode({
73
+ data,
74
+ depth = 0,
75
+ keyName,
76
+ isLast = true,
77
+ collapsed,
78
+ onToggleCollapse,
79
+ }: JsonNodeProps) {
80
+ const indent = " ".repeat(depth);
81
+ const isTopLevel = depth === 0 && onToggleCollapse !== undefined;
82
+
83
+ // Render the key label (if any)
84
+ const keyEl = keyName !== undefined ? (
85
+ <span className="text-cyan-400">&quot;{keyName}&quot;</span>
86
+ ) : null;
87
+
88
+ const separator = keyName !== undefined ? (
89
+ <span className="text-gray-400">: </span>
90
+ ) : null;
91
+
92
+ // ── Object ────────────────────────────────────────────────────────────────
93
+ if (data !== null && typeof data === "object" && !Array.isArray(data)) {
94
+ const entries = Object.entries(data as Record<string, unknown>);
95
+ const isEmpty = entries.length === 0;
96
+
97
+ if (isEmpty) {
98
+ return (
99
+ <div>
100
+ <span className="text-gray-600">{indent}</span>
101
+ {keyEl}{separator}
102
+ <span className="text-gray-200">{"{}"}</span>
103
+ {!isLast && <span className="text-gray-500">,</span>}
104
+ </div>
105
+ );
106
+ }
107
+
108
+ // Collapsible header
109
+ const isCollapsed = collapsed === true;
110
+
111
+ return (
112
+ <div>
113
+ <div
114
+ className={isTopLevel ? "cursor-pointer group" : ""}
115
+ onClick={isTopLevel ? onToggleCollapse : undefined}
116
+ >
117
+ <span className="text-gray-600">{indent}</span>
118
+ {isTopLevel && (
119
+ <span className={`text-gray-500 mr-1 text-xs transition-transform inline-block ${isCollapsed ? "" : "rotate-90"} group-hover:text-gray-300`}>
120
+
121
+ </span>
122
+ )}
123
+ {keyEl}{separator}
124
+ <span className="text-gray-200">{"{"}</span>
125
+ {isCollapsed && (
126
+ <>
127
+ <span className="text-gray-500"> … </span>
128
+ <span className="text-gray-200">{"}"}</span>
129
+ {!isLast && <span className="text-gray-500">,</span>}
130
+ </>
131
+ )}
132
+ </div>
133
+
134
+ {!isCollapsed && (
135
+ <>
136
+ {entries.map(([k, v], i) => (
137
+ <JsonNode
138
+ key={k}
139
+ data={v}
140
+ depth={depth + 1}
141
+ keyName={k}
142
+ isLast={i === entries.length - 1}
143
+ />
144
+ ))}
145
+ <div>
146
+ <span className="text-gray-600">{indent}</span>
147
+ <span className="text-gray-200">{"}"}</span>
148
+ {!isLast && <span className="text-gray-500">,</span>}
149
+ </div>
150
+ </>
151
+ )}
152
+ </div>
153
+ );
154
+ }
155
+
156
+ // ── Array ─────────────────────────────────────────────────────────────────
157
+ if (Array.isArray(data)) {
158
+ const isEmpty = data.length === 0;
159
+
160
+ if (isEmpty) {
161
+ return (
162
+ <div>
163
+ <span className="text-gray-600">{indent}</span>
164
+ {keyEl}{separator}
165
+ <span className="text-gray-200">{"[]"}</span>
166
+ {!isLast && <span className="text-gray-500">,</span>}
167
+ </div>
168
+ );
169
+ }
170
+
171
+ const isCollapsed = collapsed === true;
172
+
173
+ return (
174
+ <div>
175
+ <div
176
+ className={isTopLevel ? "cursor-pointer group" : ""}
177
+ onClick={isTopLevel ? onToggleCollapse : undefined}
178
+ >
179
+ <span className="text-gray-600">{indent}</span>
180
+ {isTopLevel && (
181
+ <span className={`text-gray-500 mr-1 text-xs transition-transform inline-block ${isCollapsed ? "" : "rotate-90"} group-hover:text-gray-300`}>
182
+
183
+ </span>
184
+ )}
185
+ {keyEl}{separator}
186
+ <span className="text-gray-200">{"["}</span>
187
+ {isCollapsed && (
188
+ <>
189
+ <span className="text-gray-500"> … </span>
190
+ <span className="text-gray-200">{"]"}</span>
191
+ {!isLast && <span className="text-gray-500">,</span>}
192
+ </>
193
+ )}
194
+ </div>
195
+
196
+ {!isCollapsed && (
197
+ <>
198
+ {data.map((item, i) => (
199
+ <JsonNode
200
+ key={i}
201
+ data={item}
202
+ depth={depth + 1}
203
+ isLast={i === data.length - 1}
204
+ />
205
+ ))}
206
+ <div>
207
+ <span className="text-gray-600">{indent}</span>
208
+ <span className="text-gray-200">{"]"}</span>
209
+ {!isLast && <span className="text-gray-500">,</span>}
210
+ </div>
211
+ </>
212
+ )}
213
+ </div>
214
+ );
215
+ }
216
+
217
+ // ── Primitive ─────────────────────────────────────────────────────────────
218
+ return (
219
+ <div>
220
+ <span className="text-gray-600">{indent}</span>
221
+ {keyEl}{separator}
222
+ <PrimitiveValue value={data} />
223
+ {!isLast && <span className="text-gray-500">,</span>}
224
+ </div>
225
+ );
226
+ }
227
+
228
+ // ─── Root Object Renderer (top-level collapsible sections) ───────────────────
229
+
230
+ function JsonRoot({ data }: { data: unknown }) {
231
+ const [collapsed, setCollapsed] = useState<Record<string, boolean>>({});
232
+
233
+ const toggle = useCallback((key: string) => {
234
+ setCollapsed((prev) => ({ ...prev, [key]: !prev[key] }));
235
+ }, []);
236
+
237
+ if (data === null || data === undefined) {
238
+ return <span className="text-gray-400">null</span>;
239
+ }
240
+
241
+ // Top-level object: render each key as a collapsible section
242
+ if (typeof data === "object" && !Array.isArray(data)) {
243
+ const entries = Object.entries(data as Record<string, unknown>);
244
+ if (entries.length === 0) {
245
+ return <span className="text-gray-200">{"{}"}</span>;
246
+ }
247
+
248
+ return (
249
+ <div>
250
+ <div><span className="text-gray-200">{"{"}</span></div>
251
+ {entries.map(([k, v], i) => (
252
+ <JsonNode
253
+ key={k}
254
+ data={v}
255
+ depth={1}
256
+ keyName={k}
257
+ isLast={i === entries.length - 1}
258
+ collapsed={collapsed[k] ?? false}
259
+ onToggleCollapse={() => toggle(k)}
260
+ />
261
+ ))}
262
+ <div><span className="text-gray-200">{"}"}</span></div>
263
+ </div>
264
+ );
265
+ }
266
+
267
+ // Top-level array or primitive — render normally
268
+ return <JsonNode data={data} depth={0} />;
269
+ }
270
+
271
+ // ─── Row Selector ─────────────────────────────────────────────────────────────
272
+
273
+ interface RowSelectorProps {
274
+ rows: HfRow[];
275
+ currentIdx: number;
276
+ onChange: (idx: number) => void;
277
+ }
278
+
279
+ function RowSelector({ rows, currentIdx, onChange }: RowSelectorProps) {
280
+ if (rows.length <= 1) return null;
281
+
282
+ return (
283
+ <div className="flex items-center gap-2">
284
+ <span className="text-xs text-gray-500">Row:</span>
285
+ <select
286
+ value={currentIdx}
287
+ onChange={(e) => onChange(Number(e.target.value))}
288
+ className="text-xs bg-gray-800 border border-gray-700 rounded px-2 py-1 text-gray-200 focus:outline-none focus:border-cyan-600"
289
+ >
290
+ {rows.map((r, i) => (
291
+ <option key={r.row_idx} value={i}>
292
+ #{r.row_idx}
293
+ </option>
294
+ ))}
295
+ </select>
296
+ </div>
297
+ );
298
+ }
299
+
300
+ // ─── Main Component ───────────────────────────────────────────────────────────
301
+
302
+ export default function YamlViewer({ datasetRepo, split = "train", onClose }: YamlViewerProps) {
303
+ // Ensure dataset repo has org prefix for HF API calls
304
+ const fullRepo = datasetRepo.includes("/") ? datasetRepo : `${HF_ORG}/${datasetRepo}`;
305
+
306
+ const [rows, setRows] = useState<HfRow[]>([]);
307
+ const [loading, setLoading] = useState(true);
308
+ const [error, setError] = useState<string | null>(null);
309
+ const [selectedRowIdx, setSelectedRowIdx] = useState(0);
310
+ const [copied, setCopied] = useState(false);
311
+
312
+ const fetchRef = useRef(0);
313
+
314
+ const fetchRows = useCallback(async () => {
315
+ setLoading(true);
316
+ setError(null);
317
+
318
+ const fetchId = ++fetchRef.current;
319
+ const baseUrl = "https://datasets-server.huggingface.co/rows";
320
+ const urlWithConfig = `${baseUrl}?dataset=${encodeURIComponent(fullRepo)}&config=default&split=${split}&offset=0&length=100`;
321
+ const urlWithoutConfig = `${baseUrl}?dataset=${encodeURIComponent(fullRepo)}&split=${split}&offset=0&length=100`;
322
+
323
+ let data: HfResponse | null = null;
324
+
325
+ try {
326
+ const res = await fetch(urlWithConfig);
327
+ if (res.ok) {
328
+ data = await res.json() as HfResponse;
329
+ } else {
330
+ const res2 = await fetch(urlWithoutConfig);
331
+ if (res2.ok) {
332
+ data = await res2.json() as HfResponse;
333
+ } else {
334
+ const errText = await res2.text();
335
+ throw new Error(`API error ${res2.status}: ${errText.slice(0, 300)}`);
336
+ }
337
+ }
338
+ } catch (e) {
339
+ if (fetchRef.current === fetchId) {
340
+ setError(e instanceof Error ? e.message : "Failed to fetch dataset");
341
+ setLoading(false);
342
+ }
343
+ return;
344
+ }
345
+
346
+ if (fetchRef.current !== fetchId) return;
347
+
348
+ if (data) {
349
+ setRows(data.rows);
350
+ setSelectedRowIdx(0);
351
+ }
352
+ setLoading(false);
353
+ }, [fullRepo, split]);
354
+
355
+ useEffect(() => {
356
+ fetchRows();
357
+ }, [fetchRows]);
358
+
359
+ // Close on Escape
360
+ useEffect(() => {
361
+ const handler = (e: KeyboardEvent) => {
362
+ if (e.key === "Escape") onClose();
363
+ };
364
+ window.addEventListener("keydown", handler);
365
+ return () => window.removeEventListener("keydown", handler);
366
+ }, [onClose]);
367
+
368
+ // ── Derive parsed data for the selected row ──────────────────────────────
369
+ const selectedRow = rows[selectedRowIdx] ?? null;
370
+
371
+ /**
372
+ * Convert a dataset row into displayable JSON data.
373
+ *
374
+ * Strategy:
375
+ * 1. If the row has exactly one column and its value is a string that parses
376
+ * as JSON/YAML-ish object → parse and display that.
377
+ * 2. If the row has a column named "config", "yaml", "json", or "data" that
378
+ * contains a string → try to parse it.
379
+ * 3. Otherwise display the whole row object directly.
380
+ */
381
+ const parsedData: unknown = (() => {
382
+ if (!selectedRow) return null;
383
+ const rowObj = selectedRow.row;
384
+ const keys = Object.keys(rowObj);
385
+
386
+ // Try single-column string parse
387
+ if (keys.length === 1) {
388
+ const val = rowObj[keys[0]];
389
+ if (typeof val === "string") {
390
+ try {
391
+ return JSON.parse(val);
392
+ } catch {
393
+ // Not valid JSON — fall through to raw display
394
+ return rowObj;
395
+ }
396
+ }
397
+ }
398
+
399
+ // Try known config column names
400
+ const configColNames = ["config", "yaml", "json", "data", "content", "value"];
401
+ for (const col of configColNames) {
402
+ if (col in rowObj && typeof rowObj[col] === "string") {
403
+ try {
404
+ return JSON.parse(rowObj[col] as string);
405
+ } catch {
406
+ // Not valid JSON — fall through
407
+ }
408
+ }
409
+ }
410
+
411
+ // Default: show the whole row
412
+ return rowObj;
413
+ })();
414
+
415
+ // ── Canonical JSON string for copy-to-clipboard ──────────────────────────
416
+ const jsonString = parsedData !== null
417
+ ? JSON.stringify(parsedData, null, 2)
418
+ : "";
419
+
420
+ const handleCopy = useCallback(async () => {
421
+ try {
422
+ await navigator.clipboard.writeText(jsonString);
423
+ setCopied(true);
424
+ setTimeout(() => setCopied(false), 2000);
425
+ } catch {
426
+ // fallback: do nothing
427
+ }
428
+ }, [jsonString]);
429
+
430
+ const shortName = datasetRepo.split("/").pop() ?? datasetRepo;
431
+ const orgName = datasetRepo.includes("/") ? datasetRepo.split("/")[0] : "";
432
+
433
+ return (
434
+ <div className="fixed inset-0 z-40 bg-gray-950 flex flex-col">
435
+ {/* ── Top bar ──────────────────────────────────────────────────────── */}
436
+ <div className="flex items-center justify-between px-4 py-3 border-b border-gray-800 flex-shrink-0 bg-gray-900">
437
+ <div className="flex items-center gap-3 min-w-0">
438
+ <span className="text-xs text-gray-500 uppercase tracking-wider font-medium flex-shrink-0">
439
+ Config Viewer
440
+ </span>
441
+ <span className="text-gray-600">·</span>
442
+ <span
443
+ className="text-sm font-medium text-gray-200 truncate"
444
+ title={datasetRepo}
445
+ >
446
+ {shortName}
447
+ </span>
448
+ {orgName && (
449
+ <span
450
+ className="text-xs text-gray-500 truncate hidden sm:block"
451
+ title={datasetRepo}
452
+ >
453
+ ({orgName})
454
+ </span>
455
+ )}
456
+ </div>
457
+
458
+ <div className="flex items-center gap-3 flex-shrink-0">
459
+ {/* Row selector (only if multiple rows) */}
460
+ {rows.length > 1 && (
461
+ <RowSelector
462
+ rows={rows}
463
+ currentIdx={selectedRowIdx}
464
+ onChange={setSelectedRowIdx}
465
+ />
466
+ )}
467
+
468
+ {/* Copy button */}
469
+ {!loading && !error && jsonString && (
470
+ <button
471
+ onClick={handleCopy}
472
+ className="text-xs px-3 py-1.5 rounded bg-gray-700 hover:bg-gray-600 text-gray-300 border border-gray-600 transition-colors"
473
+ title="Copy entire config as JSON"
474
+ >
475
+ {copied ? "Copied!" : "Copy JSON"}
476
+ </button>
477
+ )}
478
+
479
+ {/* HuggingFace link */}
480
+ <a
481
+ href={`https://huggingface.co/datasets/${fullRepo}`}
482
+ target="_blank"
483
+ rel="noopener noreferrer"
484
+ className="text-gray-500 hover:text-cyan-400 transition-colors"
485
+ title="Open on HuggingFace"
486
+ >
487
+ <svg
488
+ xmlns="http://www.w3.org/2000/svg"
489
+ className="h-4 w-4"
490
+ fill="none"
491
+ viewBox="0 0 24 24"
492
+ stroke="currentColor"
493
+ strokeWidth={2}
494
+ >
495
+ <path
496
+ strokeLinecap="round"
497
+ strokeLinejoin="round"
498
+ d="M10 6H6a2 2 0 00-2 2v10a2 2 0 002 2h10a2 2 0 002-2v-4M14 4h6m0 0v6m0-6L10 14"
499
+ />
500
+ </svg>
501
+ </a>
502
+
503
+ {/* Close */}
504
+ <button
505
+ onClick={onClose}
506
+ className="text-gray-400 hover:text-gray-200 transition-colors text-xl leading-none px-1"
507
+ title="Close viewer (Esc)"
508
+ >
509
+ ×
510
+ </button>
511
+ </div>
512
+ </div>
513
+
514
+ {/* ── Body ─────────────────────────────────────────────────────────── */}
515
+ <div className="flex-1 overflow-auto bg-gray-950">
516
+ {/* Loading */}
517
+ {loading && (
518
+ <div className="flex items-center justify-center h-32">
519
+ <div className="flex items-center gap-3 text-gray-400">
520
+ <svg
521
+ className="animate-spin h-5 w-5"
522
+ xmlns="http://www.w3.org/2000/svg"
523
+ fill="none"
524
+ viewBox="0 0 24 24"
525
+ >
526
+ <circle
527
+ className="opacity-25"
528
+ cx="12"
529
+ cy="12"
530
+ r="10"
531
+ stroke="currentColor"
532
+ strokeWidth="4"
533
+ />
534
+ <path
535
+ className="opacity-75"
536
+ fill="currentColor"
537
+ d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4z"
538
+ />
539
+ </svg>
540
+ <span className="text-sm">Loading config…</span>
541
+ </div>
542
+ </div>
543
+ )}
544
+
545
+ {/* Error */}
546
+ {error && !loading && (
547
+ <div className="flex flex-col items-center justify-center h-32 gap-3">
548
+ <p className="text-sm text-red-400">{error}</p>
549
+ <button
550
+ onClick={fetchRows}
551
+ className="text-xs px-3 py-1.5 rounded bg-gray-700 hover:bg-gray-600 text-gray-300 border border-gray-600 transition-colors"
552
+ >
553
+ Retry
554
+ </button>
555
+ </div>
556
+ )}
557
+
558
+ {/* Empty */}
559
+ {!loading && !error && rows.length === 0 && (
560
+ <div className="flex items-center justify-center h-32">
561
+ <p className="text-sm text-gray-500 italic">No data found in this dataset.</p>
562
+ </div>
563
+ )}
564
+
565
+ {/* Syntax-highlighted JSON */}
566
+ {!loading && !error && parsedData !== null && (
567
+ <div className="p-6">
568
+ <pre className="font-mono text-sm leading-relaxed whitespace-pre bg-gray-950 rounded-lg p-4 border border-gray-800 overflow-x-auto">
569
+ <JsonRoot data={parsedData} />
570
+ </pre>
571
+ </div>
572
+ )}
573
+ </div>
574
+
575
+ {/* ── Footer hint ──────────────────────────────────────────────────── */}
576
+ {!loading && !error && parsedData !== null && (
577
+ <div className="px-4 py-2 border-t border-gray-800 bg-gray-900 flex-shrink-0 flex items-center justify-between">
578
+ <span className="text-xs text-gray-600">
579
+ Click top-level keys to collapse/expand · Press Esc to close
580
+ </span>
581
+ <span className="text-xs text-gray-600">
582
+ {rows.length > 1 ? `${rows.length} rows available` : ""}
583
+ </span>
584
+ </div>
585
+ )}
586
+ </div>
587
+ );
588
+ }
frontend/src/experiments/store.ts ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useCallback, useEffect } from "react";
2
+ import type { Experiment, ExperimentDetail, SubExperiment, ExperimentNote } from "./types";
3
+ import { experimentsApi } from "./api";
4
+ import { parseHash, navigateTo as hashNavigateTo } from "../hashRouter";
5
+
6
+ export type View =
7
+ | { kind: "list" }
8
+ | { kind: "detail"; expId: string }
9
+ | { kind: "sub"; expId: string; subId: string }
10
+ | { kind: "note"; expId: string; noteId: string }
11
+ | { kind: "summary" };
12
+
13
+ export function useExperimentsState() {
14
+ const [experiments, setExperiments] = useState<Experiment[]>([]);
15
+ const [currentDetail, setCurrentDetail] = useState<ExperimentDetail | null>(null);
16
+ const [currentSub, setCurrentSub] = useState<SubExperiment | null>(null);
17
+ const [currentNote, setCurrentNote] = useState<ExperimentNote | null>(null);
18
+ const [summaryContent, setSummaryContent] = useState<string>("");
19
+ const [view, setView] = useState<View>({ kind: "list" });
20
+ const [loading, setLoading] = useState(false);
21
+ const [error, setError] = useState<string | null>(null);
22
+
23
+ const loadExperiments = useCallback(async () => {
24
+ setLoading(true);
25
+ setError(null);
26
+ try {
27
+ const data = await experimentsApi.list();
28
+ setExperiments(data);
29
+ } catch (e) {
30
+ setError(e instanceof Error ? e.message : "Failed to load experiments");
31
+ } finally {
32
+ setLoading(false);
33
+ }
34
+ }, []);
35
+
36
+ // Restore a view from hash segments (no hash update — used on mount and popstate)
37
+ const restoreFromSegments = useCallback(async (segments: string[]) => {
38
+ if (segments.length === 0) {
39
+ setView({ kind: "list" });
40
+ setCurrentDetail(null);
41
+ setCurrentSub(null);
42
+ setCurrentNote(null);
43
+ await loadExperiments();
44
+ return;
45
+ }
46
+
47
+ if (segments[0] === "summary") {
48
+ setLoading(true);
49
+ setError(null);
50
+ try {
51
+ const data = await experimentsApi.getSummary();
52
+ setSummaryContent(data.content_md || "");
53
+ setView({ kind: "summary" });
54
+ } catch (e) {
55
+ setError(e instanceof Error ? e.message : "Failed to load summary");
56
+ } finally {
57
+ setLoading(false);
58
+ }
59
+ return;
60
+ }
61
+
62
+ const expId = segments[0];
63
+ setLoading(true);
64
+ setError(null);
65
+ try {
66
+ const detail = await experimentsApi.get(expId);
67
+ setCurrentDetail(detail);
68
+
69
+ if (segments[1] === "sub" && segments[2]) {
70
+ const sub = detail.sub_experiments.find((s: SubExperiment) => s.id === segments[2]);
71
+ if (sub) {
72
+ setCurrentSub(sub);
73
+ setView({ kind: "sub", expId, subId: segments[2] });
74
+ return;
75
+ }
76
+ }
77
+
78
+ if (segments[1] === "note" && segments[2]) {
79
+ const note = (detail.experiment_notes || []).find((n: ExperimentNote) => n.id === segments[2]);
80
+ if (note) {
81
+ setCurrentNote(note);
82
+ setView({ kind: "note", expId, noteId: segments[2] });
83
+ return;
84
+ }
85
+ }
86
+
87
+ setView({ kind: "detail", expId });
88
+ } catch (e) {
89
+ setError(e instanceof Error ? e.message : "Failed to load experiment");
90
+ } finally {
91
+ setLoading(false);
92
+ }
93
+ }, [loadExperiments]);
94
+
95
+ // Restore from hash on mount
96
+ useEffect(() => {
97
+ const route = parseHash();
98
+ if (route.page === "experiments" && route.segments.length > 0) {
99
+ restoreFromSegments(route.segments);
100
+ } else {
101
+ loadExperiments();
102
+ }
103
+ }, [restoreFromSegments, loadExperiments]);
104
+
105
+ // Handle browser back/forward
106
+ useEffect(() => {
107
+ const handler = () => {
108
+ const route = parseHash();
109
+ if (route.page !== "experiments") return;
110
+ restoreFromSegments(route.segments);
111
+ };
112
+ window.addEventListener("popstate", handler);
113
+ return () => window.removeEventListener("popstate", handler);
114
+ }, [restoreFromSegments]);
115
+
116
+ const navigateToList = useCallback(() => {
117
+ setView({ kind: "list" });
118
+ setCurrentDetail(null);
119
+ setCurrentSub(null);
120
+ setCurrentNote(null);
121
+ loadExperiments();
122
+ hashNavigateTo({ page: "experiments", segments: [], params: new URLSearchParams() });
123
+ }, [loadExperiments]);
124
+
125
+ const navigateToDetail = useCallback(async (expId: string) => {
126
+ setLoading(true);
127
+ setError(null);
128
+ try {
129
+ const detail = await experimentsApi.get(expId);
130
+ setCurrentDetail(detail);
131
+ setView({ kind: "detail", expId });
132
+ hashNavigateTo({ page: "experiments", segments: [expId], params: new URLSearchParams() });
133
+ } catch (e) {
134
+ setError(e instanceof Error ? e.message : "Failed to load experiment");
135
+ } finally {
136
+ setLoading(false);
137
+ }
138
+ }, []);
139
+
140
+ const navigateToSub = useCallback((expId: string, subId: string) => {
141
+ if (!currentDetail) return;
142
+ const sub = currentDetail.sub_experiments.find((s: SubExperiment) => s.id === subId);
143
+ if (sub) {
144
+ setCurrentSub(sub);
145
+ setView({ kind: "sub", expId, subId });
146
+ hashNavigateTo({ page: "experiments", segments: [expId, "sub", subId] });
147
+ }
148
+ }, [currentDetail]);
149
+
150
+ const navigateToNote = useCallback((expId: string, noteId: string) => {
151
+ if (!currentDetail) return;
152
+ const note = (currentDetail.experiment_notes || []).find((n: ExperimentNote) => n.id === noteId);
153
+ if (note) {
154
+ setCurrentNote(note);
155
+ setView({ kind: "note", expId, noteId });
156
+ hashNavigateTo({ page: "experiments", segments: [expId, "note", noteId] });
157
+ }
158
+ }, [currentDetail]);
159
+
160
+ const navigateToSummary = useCallback(async () => {
161
+ setLoading(true);
162
+ setError(null);
163
+ try {
164
+ const data = await experimentsApi.getSummary();
165
+ setSummaryContent(data.content_md || "");
166
+ setView({ kind: "summary" });
167
+ hashNavigateTo({ page: "experiments", segments: ["summary"] });
168
+ } catch (e) {
169
+ setError(e instanceof Error ? e.message : "Failed to load summary");
170
+ } finally {
171
+ setLoading(false);
172
+ }
173
+ }, []);
174
+
175
+ const refreshDetail = useCallback(async () => {
176
+ if (view.kind === "detail" || view.kind === "sub" || view.kind === "note") {
177
+ const expId = view.expId;
178
+ try {
179
+ const detail = await experimentsApi.get(expId);
180
+ setCurrentDetail(detail);
181
+ } catch {
182
+ // silent refresh failure
183
+ }
184
+ }
185
+ }, [view]);
186
+
187
+ return {
188
+ experiments,
189
+ currentDetail,
190
+ currentSub,
191
+ currentNote,
192
+ summaryContent,
193
+ view,
194
+ loading,
195
+ error,
196
+ setError,
197
+ navigateToList,
198
+ navigateToDetail,
199
+ navigateToSub,
200
+ navigateToNote,
201
+ navigateToSummary,
202
+ refreshDetail,
203
+ loadExperiments,
204
+ };
205
+ }
frontend/src/experiments/types.ts ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export interface HfRepo {
2
+ repo: string;
3
+ description: string;
4
+ date: string;
5
+ }
6
+
7
+ // --- Live dashboard state types ---
8
+
9
+ export interface LiveJobState {
10
+ status: "pending" | "running" | "completed" | "failed" | "blocked";
11
+ cluster: string;
12
+ gpus: number;
13
+ partition?: string;
14
+ slurm_job_id?: string;
15
+ submitted_at: string;
16
+ updated_at: string;
17
+ estimated_completion?: string;
18
+ message: string;
19
+ metrics: Record<string, number | string>;
20
+ blocker?: { reason: string; since: string };
21
+ }
22
+
23
+ export interface LiveHistoryEntry {
24
+ timestamp: string;
25
+ event: string;
26
+ job_id?: string;
27
+ cluster?: string;
28
+ message: string;
29
+ }
30
+
31
+ export interface UnreachableClusterInfo {
32
+ reason: string;
33
+ since: string;
34
+ }
35
+
36
+ export type LiveStatus = "active" | "paused" | "completed" | "idle";
37
+ export type DisplayStatus = "active" | "blocked" | "partially_blocked" | "paused" | "completed" | "unreachable" | "idle";
38
+
39
+ export interface Hypothesis {
40
+ statement: string;
41
+ type: "comparative" | "ablation" | "exploration" | "reproduction";
42
+ status: "pending" | "active" | "supported" | "invalidated" | "inconclusive" | "exploring";
43
+ success_criteria: string;
44
+ }
45
+
46
+ export type Stage = "idea" | "planned" | "active" | "concluded" | "inconclusive";
47
+
48
+ export interface Experiment {
49
+ id: string;
50
+ name: string;
51
+ research_project: string;
52
+ hypothesis: Hypothesis;
53
+ stage: Stage;
54
+ completeness: number;
55
+ models: string[];
56
+ tasks: string[];
57
+ tags: string[];
58
+ hf_repos: HfRepo[];
59
+ wandb_url: string;
60
+ notes: string;
61
+ zayne_summary: string;
62
+ zayne_readme: string;
63
+ zayne_findings: string;
64
+ zayne_decisions: string;
65
+ red_team_brief: string;
66
+ created: string;
67
+ updated: string;
68
+ run_count?: number;
69
+ sub_count?: number;
70
+ note_count?: number;
71
+ // Live dashboard state (merged from dashboard_state.json)
72
+ live_status?: LiveStatus;
73
+ live_message?: string;
74
+ live_jobs?: Record<string, LiveJobState>;
75
+ unreachable_clusters?: Record<string, UnreachableClusterInfo>;
76
+ live_history?: LiveHistoryEntry[];
77
+ live_started_at?: string;
78
+ live_updated_at?: string;
79
+ }
80
+
81
+ export interface RunRecord {
82
+ id: string;
83
+ experiment_id: string;
84
+ condition: string;
85
+ model: string;
86
+ cluster: string;
87
+ status: "running" | "completed" | "failed";
88
+ hf_dataset: string;
89
+ metrics: Record<string, number | string>;
90
+ timestamp: string;
91
+ notes: string;
92
+ }
93
+
94
+ export interface SubExperiment {
95
+ id: string;
96
+ experiment_id: string;
97
+ name: string;
98
+ hypothesis: string;
99
+ status: string;
100
+ content_md: string;
101
+ hf_repos: HfRepo[];
102
+ created: string;
103
+ updated: string;
104
+ }
105
+
106
+ export interface ExperimentNote {
107
+ id: string;
108
+ experiment_id: string;
109
+ title: string;
110
+ filename: string;
111
+ relative_path: string;
112
+ content_md: string;
113
+ created: string;
114
+ updated: string;
115
+ }
116
+
117
+ // --- Activity Log types ---
118
+
119
+ export type ActivityEntryType = "action" | "result" | "note" | "milestone";
120
+
121
+ export interface ActivityLogEntry {
122
+ timestamp: string;
123
+ scope: string;
124
+ type: ActivityEntryType;
125
+ message: string;
126
+ artifacts: string[];
127
+ run_ids: string[];
128
+ author: "agent" | "user";
129
+ }
130
+
131
+ // --- Artifact types ---
132
+
133
+ export type ArtifactType =
134
+ | "input_data"
135
+ | "inference_output"
136
+ | "training_config"
137
+ | "canary_output"
138
+ | "eval_result"
139
+ | "processed_data";
140
+
141
+ export type VisualizerType =
142
+ | "model_trace"
143
+ | "table"
144
+ | "yaml_config"
145
+ | "plotly"
146
+ | "image"
147
+ | "heatmap"
148
+ | "custom";
149
+
150
+ export interface Artifact {
151
+ dataset_name: string;
152
+ experiment_id: string;
153
+ run_id: string | null;
154
+ artifact_type: ArtifactType | null;
155
+ visualizer_type: VisualizerType | null;
156
+ artifact_group: string | null;
157
+ parent_artifact: string | null;
158
+ size_bytes: number | null;
159
+ description: string;
160
+ model: string;
161
+ tags: string;
162
+ created: string;
163
+ updated: string;
164
+ }
165
+
166
+ export interface ExperimentDetail extends Experiment {
167
+ runs: RunRecord[];
168
+ sub_experiments: SubExperiment[];
169
+ experiment_notes: ExperimentNote[];
170
+ activity_log: ActivityLogEntry[];
171
+ artifacts: Artifact[];
172
+ }
173
+
174
+ // --- Display status derivation ---
175
+
176
+ export function deriveDisplayStatus(exp: Experiment): DisplayStatus {
177
+ if (exp.live_status === "completed") return "completed";
178
+ if (exp.live_status === "paused") return "paused";
179
+ const jobs = Object.values(exp.live_jobs || {});
180
+ const hasUnreachable = Object.keys(exp.unreachable_clusters || {}).length > 0;
181
+ const runningJobs = jobs.filter((j) => j.status === "running");
182
+ const blockedJobs = jobs.filter((j) => j.status === "blocked");
183
+ if (hasUnreachable && runningJobs.length === 0) return "unreachable";
184
+ if (blockedJobs.length > 0 && blockedJobs.length === jobs.length) return "blocked";
185
+ if (blockedJobs.length > 0 && runningJobs.length > 0) return "partially_blocked";
186
+ if (exp.live_status === "active" || runningJobs.length > 0) return "active";
187
+ return "idle";
188
+ }
189
+
190
+ export function statusBadgeColor(status: DisplayStatus): string {
191
+ const colors: Record<DisplayStatus, string> = {
192
+ active: "bg-green-100 text-green-800",
193
+ blocked: "bg-red-100 text-red-800",
194
+ partially_blocked: "bg-orange-100 text-orange-800",
195
+ paused: "bg-yellow-100 text-yellow-800",
196
+ completed: "bg-blue-100 text-blue-800",
197
+ unreachable: "bg-gray-200 text-orange-700",
198
+ idle: "bg-gray-100 text-gray-500",
199
+ };
200
+ return colors[status];
201
+ }
frontend/src/hashRouter.ts ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useEffect, useCallback } from "react";
2
+
3
+ export interface HashRoute {
4
+ page: "experiments" | "viz";
5
+ tab: string;
6
+ segments: string[];
7
+ params: URLSearchParams;
8
+ }
9
+
10
+ const ROUTE_CHANGE = "routechange";
11
+ const STORAGE_KEY = "agg-viz-route";
12
+
13
+ /** Read the saved route hash from localStorage (fallback when hash is empty, e.g. in an iframe). */
14
+ function getSavedHash(): string {
15
+ try {
16
+ return localStorage.getItem(STORAGE_KEY) || "";
17
+ } catch {
18
+ return "";
19
+ }
20
+ }
21
+
22
+ /** Persist the current hash to localStorage so iframe reloads restore state. */
23
+ function saveHash(hash: string) {
24
+ try {
25
+ localStorage.setItem(STORAGE_KEY, hash);
26
+ } catch {
27
+ // localStorage unavailable — ignore
28
+ }
29
+ }
30
+
31
+ /** Get the effective hash: prefer URL hash, fall back to localStorage. */
32
+ function effectiveHash(): string {
33
+ const hash = window.location.hash;
34
+ if (hash && hash !== "#" && hash !== "#/") return hash;
35
+ return getSavedHash();
36
+ }
37
+
38
+ export function parseHash(hash?: string): HashRoute {
39
+ const raw = (hash ?? effectiveHash()).replace(/^#\/?/, "");
40
+ if (!raw) {
41
+ return { page: "experiments", tab: "", segments: [], params: new URLSearchParams() };
42
+ }
43
+
44
+ const qIdx = raw.indexOf("?");
45
+ const pathPart = qIdx >= 0 ? raw.slice(0, qIdx) : raw;
46
+ const params = new URLSearchParams(qIdx >= 0 ? raw.slice(qIdx + 1) : "");
47
+ const parts = pathPart.split("/").filter(Boolean);
48
+
49
+ if (parts[0] === "viz") {
50
+ return { page: "viz", tab: parts[1] || "model", segments: parts.slice(2), params };
51
+ }
52
+
53
+ // "experiments" or anything else defaults to experiments
54
+ const segments = parts[0] === "experiments" ? parts.slice(1) : parts;
55
+ return { page: "experiments", tab: "", segments, params };
56
+ }
57
+
58
+ function buildHash(route: HashRoute): string {
59
+ const parts: string[] = [route.page === "viz" ? "viz" : "experiments"];
60
+ if (route.page === "viz" && route.tab) parts.push(route.tab);
61
+ if (route.segments.length) parts.push(...route.segments);
62
+ let hash = "#/" + parts.join("/");
63
+ const qs = route.params.toString();
64
+ if (qs) hash += "?" + qs;
65
+ return hash;
66
+ }
67
+
68
+ function applyRoute(hash: string, push: boolean) {
69
+ saveHash(hash);
70
+ if (window.location.hash === hash) return;
71
+ if (push) {
72
+ window.history.pushState(null, "", hash);
73
+ } else {
74
+ window.history.replaceState(null, "", hash);
75
+ }
76
+ window.dispatchEvent(new Event(ROUTE_CHANGE));
77
+ }
78
+
79
+ /** Navigate to a new route (creates browser history entry). */
80
+ export function navigateTo(update: Partial<HashRoute>) {
81
+ const current = parseHash();
82
+ const pageChanged = update.page !== undefined && update.page !== current.page;
83
+ const tabChanged = update.tab !== undefined && update.tab !== current.tab;
84
+
85
+ const merged: HashRoute = {
86
+ page: update.page ?? current.page,
87
+ tab: update.tab ?? (pageChanged ? (update.page === "viz" ? "model" : "") : current.tab),
88
+ segments: update.segments ?? ((pageChanged || tabChanged) ? [] : current.segments),
89
+ params: update.params ?? ((pageChanged || tabChanged) ? new URLSearchParams() : current.params),
90
+ };
91
+ applyRoute(buildHash(merged), true);
92
+ }
93
+
94
+ /** Replace current route (no history entry). Use for frequent state changes like indices. */
95
+ export function replaceRoute(update: Partial<HashRoute>) {
96
+ const current = parseHash();
97
+ const merged: HashRoute = {
98
+ page: update.page ?? current.page,
99
+ tab: update.tab ?? current.tab,
100
+ segments: update.segments ?? current.segments,
101
+ params: update.params ?? current.params,
102
+ };
103
+ applyRoute(buildHash(merged), false);
104
+ }
105
+
106
+ /** Build a shareable direct URL for the current route (bypasses HF iframe). */
107
+ export function getShareableUrl(): string {
108
+ const hash = effectiveHash();
109
+ // Use the app's direct origin (works for both .hf.space and localhost)
110
+ return `${window.location.origin}${window.location.pathname}${hash}`;
111
+ }
112
+
113
+ /** Hook that re-renders on hash route changes. */
114
+ export function useHashRoute(): HashRoute {
115
+ const [route, setRoute] = useState<HashRoute>(parseHash);
116
+
117
+ useEffect(() => {
118
+ const handler = () => setRoute(parseHash());
119
+ window.addEventListener(ROUTE_CHANGE, handler);
120
+ window.addEventListener("popstate", handler);
121
+ window.addEventListener("hashchange", handler);
122
+ return () => {
123
+ window.removeEventListener(ROUTE_CHANGE, handler);
124
+ window.removeEventListener("popstate", handler);
125
+ window.removeEventListener("hashchange", handler);
126
+ };
127
+ }, []);
128
+
129
+ // On mount, if URL hash is empty but localStorage has a saved route, apply it
130
+ useEffect(() => {
131
+ const urlHash = window.location.hash;
132
+ if (!urlHash || urlHash === "#" || urlHash === "#/") {
133
+ const saved = getSavedHash();
134
+ if (saved) {
135
+ window.history.replaceState(null, "", saved);
136
+ setRoute(parseHash(saved));
137
+ }
138
+ }
139
+ }, []);
140
+
141
+ return route;
142
+ }
143
+
144
+ /** Hook for the copy-link button. Returns a callback that copies the shareable URL. */
145
+ export function useCopyLink() {
146
+ const [copied, setCopied] = useState(false);
147
+
148
+ const copyLink = useCallback(async () => {
149
+ const url = getShareableUrl();
150
+ try {
151
+ await navigator.clipboard.writeText(url);
152
+ setCopied(true);
153
+ setTimeout(() => setCopied(false), 2000);
154
+ } catch {
155
+ // Fallback: prompt user
156
+ window.prompt("Copy this link:", url);
157
+ }
158
+ }, []);
159
+
160
+ return { copyLink, copied };
161
+ }
frontend/src/index.css ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @tailwind base;
2
+ @tailwind components;
3
+ @tailwind utilities;
4
+
5
+ /* Custom scrollbar for trace panels */
6
+ .trace-scroll::-webkit-scrollbar {
7
+ width: 6px;
8
+ }
9
+ .trace-scroll::-webkit-scrollbar-track {
10
+ background: transparent;
11
+ }
12
+ .trace-scroll::-webkit-scrollbar-thumb {
13
+ background: #4b5563;
14
+ border-radius: 3px;
15
+ }
16
+ .trace-scroll::-webkit-scrollbar-thumb:hover {
17
+ background: #6b7280;
18
+ }
19
+
20
+ /* Drag-to-reorder panel feedback — themed per visualizer */
21
+ .theme-model .panel-drop-target {
22
+ outline: 2px dashed #60a5fa;
23
+ outline-offset: -2px;
24
+ border-radius: 0.5rem;
25
+ background: rgba(96, 165, 250, 0.05);
26
+ }
27
+
28
+ .theme-arena .panel-drop-target {
29
+ outline: 2px dashed #a78bfa;
30
+ outline-offset: -2px;
31
+ border-radius: 0.5rem;
32
+ background: rgba(167, 139, 250, 0.05);
33
+ }
34
+
35
+ .theme-rlm .panel-drop-target {
36
+ outline: 2px dashed #fb923c;
37
+ outline-offset: -2px;
38
+ border-radius: 0.5rem;
39
+ background: rgba(251, 146, 60, 0.05);
40
+ }
41
+
42
+ .theme-harbor .panel-drop-target {
43
+ outline: 2px dashed #2dd4bf;
44
+ outline-offset: -2px;
45
+ border-radius: 0.5rem;
46
+ background: rgba(45, 212, 191, 0.05);
47
+ }
48
+
49
+ /* Code block styling (used by Harbor visualizer) */
50
+ .code-block {
51
+ font-family: "JetBrains Mono", "Fira Code", "Cascadia Code", monospace;
52
+ font-size: 0.8rem;
53
+ line-height: 1.4;
54
+ }
55
+
56
+ .drag-handle {
57
+ cursor: grab;
58
+ }
59
+
60
+ .drag-handle:active {
61
+ cursor: grabbing;
62
+ }
63
+
64
+ /* ── Light theme overrides ─────────────────────────────────
65
+ When .light is on <html>, remap dark grays to light equivalents.
66
+ This avoids rewriting every component — the Tailwind classes stay
67
+ the same, but the underlying colors change. */
68
+
69
+ .light {
70
+ /* Background overrides */
71
+ --tw-bg-opacity: 1;
72
+ }
73
+
74
+ /* Backgrounds: dark → light */
75
+ .light .bg-gray-950 { background-color: #ffffff !important; }
76
+ .light .bg-gray-900 { background-color: #f9fafb !important; }
77
+ .light .bg-gray-900\/80 { background-color: rgba(249, 250, 251, 0.8) !important; }
78
+ .light .bg-gray-900\/50 { background-color: rgba(249, 250, 251, 0.5) !important; }
79
+ .light .bg-gray-800 { background-color: #f3f4f6 !important; }
80
+ .light .bg-gray-800\/50 { background-color: rgba(243, 244, 246, 0.5) !important; }
81
+ .light .bg-gray-700 { background-color: #e5e7eb !important; }
82
+
83
+ /* Text: light on dark → dark on light */
84
+ .light .text-gray-100 { color: #111827 !important; }
85
+ .light .text-gray-200 { color: #1f2937 !important; }
86
+ .light .text-gray-300 { color: #374151 !important; }
87
+ .light .text-gray-400 { color: #6b7280 !important; }
88
+ .light .text-gray-500 { color: #9ca3af !important; }
89
+ .light .text-gray-600 { color: #d1d5db !important; }
90
+
91
+ /* Borders */
92
+ .light .border-gray-700 { border-color: #e5e7eb !important; }
93
+ .light .border-gray-800 { border-color: #f3f4f6 !important; }
94
+ .light .border-gray-800\/50 { border-color: rgba(243, 244, 246, 0.5) !important; }
95
+
96
+ /* Accent colors stay vibrant but slightly adjusted for contrast */
97
+ .light .text-cyan-400 { color: #0891b2 !important; }
98
+ .light .text-cyan-400\/80 { color: rgba(8, 145, 178, 0.8) !important; }
99
+ .light .border-cyan-500 { border-color: #06b6d4 !important; }
100
+ .light .text-emerald-300 { color: #059669 !important; }
101
+ .light .text-emerald-400 { color: #059669 !important; }
102
+ .light .text-amber-400 { color: #d97706 !important; }
103
+ .light .text-amber-400\/80 { color: rgba(217, 119, 6, 0.8) !important; }
104
+ .light .text-amber-400\/70 { color: rgba(217, 119, 6, 0.7) !important; }
105
+ .light .text-violet-400 { color: #7c3aed !important; }
106
+ .light .text-red-400 { color: #dc2626 !important; }
107
+ .light .text-yellow-400 { color: #ca8a04 !important; }
108
+ .light .text-green-400 { color: #16a34a !important; }
109
+
110
+ /* Accent backgrounds */
111
+ .light .bg-emerald-950\/30 { background-color: rgba(5, 150, 105, 0.08) !important; }
112
+ .light .bg-emerald-900 { background-color: rgba(5, 150, 105, 0.15) !important; }
113
+ .light .bg-violet-950\/30 { background-color: rgba(124, 58, 237, 0.08) !important; }
114
+ .light .bg-orange-900\/20 { background-color: rgba(234, 88, 12, 0.08) !important; }
115
+ .light .bg-red-900\/10 { background-color: rgba(220, 38, 38, 0.05) !important; }
116
+ .light .bg-cyan-600 { background-color: #0891b2 !important; }
117
+
118
+ /* Border accents */
119
+ .light .border-emerald-500 { border-color: #059669 !important; }
120
+ .light .border-emerald-700 { border-color: #059669 !important; }
121
+ .light .border-amber-500 { border-color: #d97706 !important; }
122
+ .light .border-violet-500 { border-color: #7c3aed !important; }
123
+ .light .border-cyan-500 { border-color: #0891b2 !important; }
124
+ .light .border-orange-800\/50 { border-color: rgba(234, 88, 12, 0.3) !important; }
125
+
126
+ /* Hover states */
127
+ .light .hover\:bg-gray-800:hover { background-color: #e5e7eb !important; }
128
+ .light .hover\:bg-gray-900\/50:hover { background-color: rgba(249, 250, 251, 0.5) !important; }
129
+ .light .hover\:text-gray-200:hover { color: #1f2937 !important; }
130
+ .light .hover\:text-gray-300:hover { color: #374151 !important; }
131
+ .light .hover\:border-gray-700:hover { border-color: #d1d5db !important; }
132
+ .light .hover\:border-gray-400:hover { border-color: #9ca3af !important; }
133
+ .light .hover\:text-cyan-300:hover { color: #0891b2 !important; }
134
+
135
+ /* Inputs and selects */
136
+ .light select { background-color: #f3f4f6 !important; color: #374151 !important; border-color: #d1d5db !important; }
137
+ .light input { background-color: #f9fafb !important; color: #374151 !important; border-color: #d1d5db !important; }
138
+
139
+ /* Scrollbar */
140
+ .light .trace-scroll::-webkit-scrollbar-thumb { background: #d1d5db; }
141
+ .light .trace-scroll::-webkit-scrollbar-thumb:hover { background: #9ca3af; }
142
+
143
+ /* Markdown code blocks */
144
+ .light pre { background-color: #f3f4f6 !important; }
145
+ .light code { background-color: #f3f4f6 !important; color: #374151 !important; }
frontend/src/main.tsx ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import ReactDOM from "react-dom/client";
3
+ import App from "./App";
4
+ import "./index.css";
5
+
6
+ ReactDOM.createRoot(document.getElementById("root")!).render(
7
+ <React.StrictMode>
8
+ <App />
9
+ </React.StrictMode>
10
+ );
frontend/src/model/ModelApp.tsx ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useEffect, useCallback, useRef, useState } from "react";
2
+ import { useAppState } from "./store";
3
+ import Sidebar from "./components/Sidebar";
4
+ import TracePanel, { type DragHandleProps } from "./components/TracePanel";
5
+ import InfoBar from "./components/InfoBar";
6
+ import QuestionNav from "./components/QuestionNav";
7
+ import type { DatasetInfo, QuestionData, Preset } from "./types";
8
+ import { api } from "./api";
9
+
10
+ export default function ModelApp() {
11
+ const state = useAppState();
12
+
13
+ const handleLoadPreset = useCallback(async (preset: Preset) => {
14
+ await state.addDataset(preset.repo, preset.column, preset.split, undefined, preset.id, preset.name);
15
+ }, [state.addDataset]);
16
+
17
+ const handleSavePreset = useCallback(async (name: string, repo: string, column: string, split?: string) => {
18
+ const preset = await api.createPreset(name, repo, column, split);
19
+ state.setPresets((prev) => [...prev, preset]);
20
+ }, []);
21
+
22
+ const handleDeletePreset = useCallback(async (id: string, datasetId?: string) => {
23
+ await api.deletePreset(id);
24
+ state.setPresets((prev) => prev.filter((p) => p.id !== id));
25
+ if (datasetId) {
26
+ state.clearDatasetPreset(datasetId);
27
+ }
28
+ }, [state.clearDatasetPreset]);
29
+
30
+ const handleUpdatePreset = useCallback(async (presetId: string, datasetId: string, updates: { name?: string }) => {
31
+ const updated = await api.updatePreset(presetId, updates);
32
+ state.setPresets(prev => prev.map(p => p.id === presetId ? updated : p));
33
+ if (updates.name) {
34
+ state.updateDatasetPresetName(datasetId, updates.name);
35
+ }
36
+ }, [state.updateDatasetPresetName]);
37
+
38
+ // Keyboard shortcuts
39
+ useEffect(() => {
40
+ const handler = (e: KeyboardEvent) => {
41
+ if (e.target instanceof HTMLInputElement || e.target instanceof HTMLTextAreaElement) return;
42
+ switch (e.key) {
43
+ case "j":
44
+ state.setQuestionIdx((prev) => Math.min(state.maxQuestions - 1, prev + 1));
45
+ break;
46
+ case "k":
47
+ state.setQuestionIdx((prev) => Math.max(0, prev - 1));
48
+ break;
49
+ case "l":
50
+ state.setSampleIdx((prev) => Math.min(state.maxSamples - 1, prev + 1));
51
+ break;
52
+ case "h":
53
+ state.setSampleIdx((prev) => Math.max(0, prev - 1));
54
+ break;
55
+ }
56
+ };
57
+ window.addEventListener("keydown", handler);
58
+ return () => window.removeEventListener("keydown", handler);
59
+ }, [state.maxQuestions, state.maxSamples, state.setQuestionIdx, state.setSampleIdx]);
60
+
61
+ return (
62
+ <div className="h-full flex overflow-hidden">
63
+ <Sidebar
64
+ datasets={state.datasets}
65
+ presets={state.presets}
66
+ loading={state.loading}
67
+ groups={state.groups}
68
+ groupIds={state.groupIds}
69
+ currentGroupId={state.currentGroupId}
70
+ onAddDataset={state.addDataset}
71
+ onRemoveDataset={state.removeDataset}
72
+ onToggleDataset={state.toggleDataset}
73
+ onSetCurrentGroup={state.setCurrentGroupId}
74
+ onLoadPreset={handleLoadPreset}
75
+ onSavePreset={handleSavePreset}
76
+ onDeletePreset={handleDeletePreset}
77
+ onUpdatePreset={handleUpdatePreset}
78
+ />
79
+
80
+ <div className="flex-1 flex flex-col min-w-0">
81
+ {/* Error banner */}
82
+ {state.error && (
83
+ <div className="px-4 py-2 bg-red-900/50 border-b border-red-700 text-red-300 text-sm flex items-center justify-between">
84
+ <span>{state.error}</span>
85
+ <button onClick={() => state.setError(null)} className="text-red-400 hover:text-red-300 ml-2">
86
+ Dismiss
87
+ </button>
88
+ </div>
89
+ )}
90
+
91
+ <InfoBar
92
+ activeDatasets={state.activeDatasets}
93
+ questionIdx={state.questionIdx}
94
+ sampleIdx={state.sampleIdx}
95
+ getQuestionData={state.getQuestionData}
96
+ />
97
+
98
+ {/* Trace panels (drag to reorder) */}
99
+ <PanelContainer
100
+ datasets={state.orderedActiveDatasets}
101
+ getQuestionData={state.getQuestionData}
102
+ sampleIdx={state.sampleIdx}
103
+ onReorder={state.reorderPanels}
104
+ />
105
+
106
+ <QuestionNav
107
+ questionIdx={state.questionIdx}
108
+ sampleIdx={state.sampleIdx}
109
+ maxQuestions={state.maxQuestions}
110
+ maxSamples={state.maxSamples}
111
+ filter={state.filter}
112
+ onQuestionChange={state.setQuestionIdx}
113
+ onSampleChange={state.setSampleIdx}
114
+ onFilterChange={state.setFilter}
115
+ />
116
+ </div>
117
+ </div>
118
+ );
119
+ }
120
+
121
+ /* ── Drag-to-reorder panel container ── */
122
+
123
+ interface PanelContainerProps {
124
+ datasets: DatasetInfo[];
125
+ getQuestionData: (dsId: string) => QuestionData | undefined;
126
+ sampleIdx: number;
127
+ onReorder: (fromId: string, toId: string) => void;
128
+ }
129
+
130
+ function PanelContainer({ datasets, getQuestionData, sampleIdx, onReorder }: PanelContainerProps) {
131
+ const [draggedId, setDraggedId] = useState<string | null>(null);
132
+ const [overId, setOverId] = useState<string | null>(null);
133
+ const dragCounter = useRef<Record<string, number>>({});
134
+
135
+ const handleDragStart = useCallback((e: React.DragEvent, id: string) => {
136
+ setDraggedId(id);
137
+ e.dataTransfer.effectAllowed = "move";
138
+ // Use a transparent 1x1 image so the browser doesn't clone the panel
139
+ const ghost = document.createElement("canvas");
140
+ ghost.width = 1;
141
+ ghost.height = 1;
142
+ e.dataTransfer.setDragImage(ghost, 0, 0);
143
+ }, []);
144
+
145
+ const handleDragEnd = useCallback(() => {
146
+ setDraggedId(null);
147
+ setOverId(null);
148
+ dragCounter.current = {};
149
+ }, []);
150
+
151
+ const handleDragEnter = useCallback((e: React.DragEvent, id: string) => {
152
+ e.preventDefault();
153
+ dragCounter.current[id] = (dragCounter.current[id] || 0) + 1;
154
+ setOverId(id);
155
+ }, []);
156
+
157
+ const handleDragLeave = useCallback((_e: React.DragEvent, id: string) => {
158
+ dragCounter.current[id] = (dragCounter.current[id] || 0) - 1;
159
+ if (dragCounter.current[id] <= 0) {
160
+ dragCounter.current[id] = 0;
161
+ setOverId(prev => prev === id ? null : prev);
162
+ }
163
+ }, []);
164
+
165
+ const handleDragOver = useCallback((e: React.DragEvent) => {
166
+ e.preventDefault();
167
+ e.dataTransfer.dropEffect = "move";
168
+ }, []);
169
+
170
+ const handleDrop = useCallback((e: React.DragEvent, targetId: string) => {
171
+ e.preventDefault();
172
+ if (draggedId && draggedId !== targetId) {
173
+ onReorder(draggedId, targetId);
174
+ }
175
+ setDraggedId(null);
176
+ setOverId(null);
177
+ dragCounter.current = {};
178
+ }, [draggedId, onReorder]);
179
+
180
+ if (datasets.length === 0) {
181
+ return (
182
+ <div className="flex-1 flex gap-2 p-2 overflow-x-auto min-h-0">
183
+ <div className="flex-1 flex items-center justify-center text-gray-500">
184
+ <div className="text-center">
185
+ <p className="text-lg mb-2">No repos active</p>
186
+ <p className="text-sm">Add a HuggingFace repo from the sidebar to get started</p>
187
+ </div>
188
+ </div>
189
+ </div>
190
+ );
191
+ }
192
+
193
+ return (
194
+ <div className="flex-1 flex gap-2 p-2 overflow-x-auto min-h-0">
195
+ {datasets.map((ds) => {
196
+ const isDragged = draggedId === ds.id;
197
+ const isOver = overId === ds.id && draggedId !== null && draggedId !== ds.id;
198
+
199
+ const handleProps: DragHandleProps = {
200
+ draggable: true,
201
+ onDragStart: (e) => handleDragStart(e, ds.id),
202
+ onDragEnd: handleDragEnd,
203
+ };
204
+
205
+ return (
206
+ <div
207
+ key={ds.id}
208
+ onDragEnter={(e) => handleDragEnter(e, ds.id)}
209
+ onDragLeave={(e) => handleDragLeave(e, ds.id)}
210
+ onDragOver={handleDragOver}
211
+ onDrop={(e) => handleDrop(e, ds.id)}
212
+ className={`flex-1 min-w-0 transition-all duration-150 ${
213
+ isDragged ? "opacity-30 scale-[0.97]" : ""
214
+ } ${isOver ? "panel-drop-target" : ""}`}
215
+ >
216
+ <TracePanel
217
+ datasetName={ds.presetName || ds.name}
218
+ repoName={ds.presetName ? ds.name : undefined}
219
+ data={getQuestionData(ds.id)}
220
+ sampleIdx={sampleIdx}
221
+ dragHandleProps={handleProps}
222
+ />
223
+ </div>
224
+ );
225
+ })}
226
+ </div>
227
+ );
228
+ }
frontend/src/model/api.ts ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { DatasetInfo, QuestionData, DatasetSummary, Preset } from "./types";
2
+
3
+ const BASE = "/api/model";
4
+ const PRESETS_BASE = "/api/presets/model";
5
+
6
+ async function fetchJSON<T>(url: string, opts?: RequestInit): Promise<T> {
7
+ const res = await fetch(url, {
8
+ headers: { "Content-Type": "application/json" },
9
+ ...opts,
10
+ });
11
+ if (!res.ok) {
12
+ const err = await res.json().catch(() => ({ error: res.statusText }));
13
+ throw new Error(err.error || res.statusText);
14
+ }
15
+ return res.json();
16
+ }
17
+
18
+ export const api = {
19
+ loadDataset(repo: string, column?: string, split?: string, promptColumn?: string) {
20
+ return fetchJSON<DatasetInfo & { columns: string[]; question_fingerprint: string }>(`${BASE}/datasets/load`, {
21
+ method: "POST",
22
+ body: JSON.stringify({ repo, column, split, prompt_column: promptColumn }),
23
+ });
24
+ },
25
+
26
+ listDatasets() {
27
+ return fetchJSON<DatasetInfo[]>(`${BASE}/datasets/`);
28
+ },
29
+
30
+ getQuestion(dsId: string, idx: number) {
31
+ return fetchJSON<QuestionData>(`${BASE}/datasets/${dsId}/question/${idx}`);
32
+ },
33
+
34
+ getSummary(dsId: string) {
35
+ return fetchJSON<DatasetSummary>(`${BASE}/datasets/${dsId}/summary`);
36
+ },
37
+
38
+ unloadDataset(dsId: string) {
39
+ return fetchJSON<{ status: string }>(`${BASE}/datasets/${dsId}`, { method: "DELETE" });
40
+ },
41
+
42
+ listPresets() {
43
+ return fetchJSON<Preset[]>(`${PRESETS_BASE}`);
44
+ },
45
+
46
+ createPreset(name: string, repo: string, column: string, split?: string) {
47
+ return fetchJSON<Preset>(`${PRESETS_BASE}`, {
48
+ method: "POST",
49
+ body: JSON.stringify({ name, repo, column, split }),
50
+ });
51
+ },
52
+
53
+ updatePreset(id: string, updates: { name?: string; column?: string; split?: string }) {
54
+ return fetchJSON<Preset>(`${PRESETS_BASE}/${id}`, {
55
+ method: "PUT",
56
+ body: JSON.stringify(updates),
57
+ });
58
+ },
59
+
60
+ deletePreset(id: string) {
61
+ return fetchJSON<{ status: string }>(`${PRESETS_BASE}/${id}`, { method: "DELETE" });
62
+ },
63
+ };
frontend/src/model/components/InfoBar.tsx ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { DatasetInfo, QuestionData } from "../types";
2
+
3
+ interface InfoBarProps {
4
+ activeDatasets: DatasetInfo[];
5
+ questionIdx: number;
6
+ sampleIdx: number;
7
+ getQuestionData: (dsId: string) => QuestionData | undefined;
8
+ }
9
+
10
+ export default function InfoBar({ activeDatasets, questionIdx, sampleIdx, getQuestionData }: InfoBarProps) {
11
+ let questionText = "";
12
+ let nSamples = 0;
13
+ const firstData = activeDatasets.length > 0 ? getQuestionData(activeDatasets[0].id) : undefined;
14
+ if (firstData) {
15
+ questionText = firstData.question;
16
+ nSamples = firstData.n_samples;
17
+ }
18
+
19
+ if (!questionText) {
20
+ return (
21
+ <div className="px-4 py-3 border-b border-gray-700 bg-gray-900/80">
22
+ <p className="text-sm text-gray-500 italic">Load repos and select a question to begin</p>
23
+ </div>
24
+ );
25
+ }
26
+
27
+ return (
28
+ <div className="px-4 py-3 border-b border-gray-700 bg-gray-900/80">
29
+ {/* Question text */}
30
+ <div className="text-sm text-gray-200 font-medium mb-2 leading-relaxed max-h-24 overflow-y-auto">
31
+ Q{questionIdx}: {questionText}
32
+ </div>
33
+
34
+ {/* Sample bar */}
35
+ {nSamples > 1 && (
36
+ <div className="flex items-center gap-1 flex-wrap">
37
+ <span className="text-[10px] text-gray-500 mr-1">Samples:</span>
38
+ {Array.from({ length: nSamples }, (_, i) => {
39
+ const results = activeDatasets.map((ds) => {
40
+ const d = getQuestionData(ds.id);
41
+ return d?.eval_correct[i];
42
+ });
43
+ const allCorrect = results.every((r) => r === true);
44
+ const someCorrect = results.some((r) => r === true);
45
+ const noneCorrect = results.every((r) => r === false);
46
+
47
+ let bgColor = "bg-gray-700";
48
+ if (allCorrect) bgColor = "bg-green-700";
49
+ else if (someCorrect) bgColor = "bg-yellow-700";
50
+ else if (noneCorrect) bgColor = "bg-red-900";
51
+
52
+ const isSelected = i === sampleIdx;
53
+
54
+ return (
55
+ <span
56
+ key={i}
57
+ className={`inline-block w-4 h-4 rounded-sm text-[9px] text-center leading-4 font-mono ${bgColor} ${
58
+ isSelected ? "ring-2 ring-blue-400 ring-offset-1 ring-offset-gray-900" : ""
59
+ }`}
60
+ title={`Sample ${i + 1}: ${results.map((r, j) => `${activeDatasets[j]?.name}=${r ? "correct" : "wrong"}`).join(", ")}`}
61
+ >
62
+ {i + 1}
63
+ </span>
64
+ );
65
+ })}
66
+ <span className="text-[10px] text-gray-600 ml-2">
67
+ <span className="inline-block w-2.5 h-2.5 rounded-sm bg-green-700 mr-0.5 align-middle" /> all
68
+ <span className="inline-block w-2.5 h-2.5 rounded-sm bg-yellow-700 mr-0.5 ml-1.5 align-middle" /> some
69
+ <span className="inline-block w-2.5 h-2.5 rounded-sm bg-red-900 mr-0.5 ml-1.5 align-middle" /> none
70
+ </span>
71
+ </div>
72
+ )}
73
+
74
+ {/* Per-repo correctness for current sample */}
75
+ <div className="flex items-center gap-3 mt-1.5 flex-wrap">
76
+ {activeDatasets.map((ds) => {
77
+ const d = getQuestionData(ds.id);
78
+ const correct = d?.eval_correct[sampleIdx];
79
+ return (
80
+ <span key={ds.id} className="text-[11px]">
81
+ <span className="text-gray-500">{ds.name}: </span>
82
+ <span className={correct ? "text-green-400" : "text-red-400"}>
83
+ {correct === undefined ? "?" : correct ? "Correct" : "Wrong"}
84
+ </span>
85
+ </span>
86
+ );
87
+ })}
88
+ </div>
89
+ </div>
90
+ );
91
+ }
frontend/src/model/components/QuestionNav.tsx ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { FilterMode } from "../types";
2
+
3
+ interface QuestionNavProps {
4
+ questionIdx: number;
5
+ sampleIdx: number;
6
+ maxQuestions: number;
7
+ maxSamples: number;
8
+ filter: FilterMode;
9
+ onQuestionChange: (idx: number) => void;
10
+ onSampleChange: (idx: number) => void;
11
+ onFilterChange: (filter: FilterMode) => void;
12
+ }
13
+
14
+ const FILTERS: { value: FilterMode; label: string }[] = [
15
+ { value: "all", label: "All" },
16
+ { value: "improvements", label: "Improvements" },
17
+ { value: "regressions", label: "Regressions" },
18
+ { value: "both-correct", label: "Both Correct" },
19
+ { value: "both-wrong", label: "Both Wrong" },
20
+ ];
21
+
22
+ export default function QuestionNav({
23
+ questionIdx, sampleIdx, maxQuestions, maxSamples,
24
+ filter, onQuestionChange, onSampleChange, onFilterChange,
25
+ }: QuestionNavProps) {
26
+ const prevQ = () => onQuestionChange(Math.max(0, questionIdx - 1));
27
+ const nextQ = () => onQuestionChange(Math.min(maxQuestions - 1, questionIdx + 1));
28
+ const prevS = () => onSampleChange(Math.max(0, sampleIdx - 1));
29
+ const nextS = () => onSampleChange(Math.min(maxSamples - 1, sampleIdx + 1));
30
+
31
+ return (
32
+ <div className="px-4 py-2 border-t border-gray-700 bg-gray-900/80 flex items-center justify-between flex-wrap gap-2">
33
+ {/* Question navigation */}
34
+ <div className="flex items-center gap-2">
35
+ <button
36
+ onClick={prevQ}
37
+ disabled={questionIdx <= 0}
38
+ className="px-2 py-1 text-xs bg-gray-800 hover:bg-gray-700 disabled:opacity-40 rounded border border-gray-600 text-gray-300 transition-colors"
39
+ >
40
+ &larr; Prev Q
41
+ </button>
42
+ <div className="flex items-center gap-1">
43
+ <span className="text-xs text-gray-500">Q</span>
44
+ <input
45
+ type="number"
46
+ value={questionIdx}
47
+ onChange={(e) => {
48
+ const v = parseInt(e.target.value);
49
+ if (!isNaN(v) && v >= 0 && v < maxQuestions) onQuestionChange(v);
50
+ }}
51
+ className="w-16 px-1.5 py-1 text-xs text-center bg-gray-800 border border-gray-600 rounded text-gray-200 focus:border-blue-500 focus:outline-none"
52
+ />
53
+ <span className="text-xs text-gray-500">/ {maxQuestions > 0 ? maxQuestions - 1 : 0}</span>
54
+ </div>
55
+ <button
56
+ onClick={nextQ}
57
+ disabled={questionIdx >= maxQuestions - 1}
58
+ className="px-2 py-1 text-xs bg-gray-800 hover:bg-gray-700 disabled:opacity-40 rounded border border-gray-600 text-gray-300 transition-colors"
59
+ >
60
+ Next Q &rarr;
61
+ </button>
62
+ </div>
63
+
64
+ {/* Sample navigation */}
65
+ {maxSamples > 1 && (
66
+ <div className="flex items-center gap-2">
67
+ <button
68
+ onClick={prevS}
69
+ disabled={sampleIdx <= 0}
70
+ className="px-2 py-1 text-xs bg-gray-800 hover:bg-gray-700 disabled:opacity-40 rounded border border-gray-600 text-gray-300 transition-colors"
71
+ >
72
+ &larr; Prev S
73
+ </button>
74
+ <span className="text-xs text-gray-400">
75
+ Sample {sampleIdx + 1}/{maxSamples}
76
+ </span>
77
+ <button
78
+ onClick={nextS}
79
+ disabled={sampleIdx >= maxSamples - 1}
80
+ className="px-2 py-1 text-xs bg-gray-800 hover:bg-gray-700 disabled:opacity-40 rounded border border-gray-600 text-gray-300 transition-colors"
81
+ >
82
+ Next S &rarr;
83
+ </button>
84
+ </div>
85
+ )}
86
+
87
+ {/* Filter */}
88
+ <div className="flex items-center gap-1">
89
+ {FILTERS.map((f) => (
90
+ <button
91
+ key={f.value}
92
+ onClick={() => onFilterChange(f.value)}
93
+ className={`px-2 py-1 text-[10px] rounded border transition-colors ${
94
+ filter === f.value
95
+ ? "bg-blue-600 border-blue-500 text-white"
96
+ : "bg-gray-800 border-gray-600 text-gray-400 hover:bg-gray-700"
97
+ }`}
98
+ >
99
+ {f.label}
100
+ </button>
101
+ ))}
102
+ </div>
103
+
104
+ {/* Keyboard hints */}
105
+ <div className="text-[10px] text-gray-600">
106
+ <kbd className="px-1 bg-gray-800 rounded">j</kbd>/<kbd className="px-1 bg-gray-800 rounded">k</kbd> question
107
+ {" "}
108
+ <kbd className="px-1 bg-gray-800 rounded">h</kbd>/<kbd className="px-1 bg-gray-800 rounded">l</kbd> sample
109
+ </div>
110
+ </div>
111
+ );
112
+ }
frontend/src/model/components/Sidebar.tsx ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState } from "react";
2
+ import type { DatasetInfo, Preset } from "../types";
3
+
4
+ // Consistent group colors for visual distinction
5
+ const GROUP_COLORS = [
6
+ { bg: "bg-blue-500", border: "border-blue-500", text: "text-blue-400", label: "text-blue-300" },
7
+ { bg: "bg-emerald-500", border: "border-emerald-500", text: "text-emerald-400", label: "text-emerald-300" },
8
+ { bg: "bg-amber-500", border: "border-amber-500", text: "text-amber-400", label: "text-amber-300" },
9
+ { bg: "bg-purple-500", border: "border-purple-500", text: "text-purple-400", label: "text-purple-300" },
10
+ { bg: "bg-rose-500", border: "border-rose-500", text: "text-rose-400", label: "text-rose-300" },
11
+ { bg: "bg-cyan-500", border: "border-cyan-500", text: "text-cyan-400", label: "text-cyan-300" },
12
+ ];
13
+
14
+ interface SidebarProps {
15
+ datasets: DatasetInfo[];
16
+ presets: Preset[];
17
+ loading: Record<string, boolean>;
18
+ groups: Record<string, DatasetInfo[]>;
19
+ groupIds: string[];
20
+ currentGroupId: string | null;
21
+ onAddDataset: (repo: string, column?: string, split?: string, promptColumn?: string) => void;
22
+ onRemoveDataset: (id: string) => void;
23
+ onToggleDataset: (id: string) => void;
24
+ onSetCurrentGroup: (groupId: string) => void;
25
+ onLoadPreset: (preset: Preset) => void;
26
+ onSavePreset: (name: string, repo: string, column: string, split?: string) => void;
27
+ onDeletePreset: (id: string, datasetId?: string) => void;
28
+ onUpdatePreset: (presetId: string, datasetId: string, updates: { name?: string }) => void;
29
+ }
30
+
31
+ export default function Sidebar({
32
+ datasets, presets, loading,
33
+ groups, groupIds, currentGroupId,
34
+ onAddDataset, onRemoveDataset, onToggleDataset, onSetCurrentGroup,
35
+ onLoadPreset, onSavePreset, onDeletePreset, onUpdatePreset,
36
+ }: SidebarProps) {
37
+ const [showAddModal, setShowAddModal] = useState(false);
38
+ const [repoInput, setRepoInput] = useState("");
39
+ const [columnInput, setColumnInput] = useState("");
40
+ const [splitInput, setSplitInput] = useState("train");
41
+ const [promptColumnInput, setPromptColumnInput] = useState("");
42
+ const [presetSearch, setPresetSearch] = useState("");
43
+ // Track which dataset is currently being saved as a preset (by dataset id)
44
+ const [savingPresetForId, setSavingPresetForId] = useState<string | null>(null);
45
+ const [presetName, setPresetName] = useState("");
46
+ // Track which dataset is selected for preset editing
47
+ const [editingDatasetId, setEditingDatasetId] = useState<string | null>(null);
48
+ const [editPresetName, setEditPresetName] = useState("");
49
+
50
+ const handleAdd = () => {
51
+ if (!repoInput.trim()) return;
52
+ onAddDataset(
53
+ repoInput.trim(),
54
+ columnInput.trim() || undefined,
55
+ splitInput.trim() || undefined,
56
+ promptColumnInput.trim() || undefined,
57
+ );
58
+ setRepoInput("");
59
+ setShowAddModal(false);
60
+ };
61
+
62
+ const handleSavePresetForRepo = (ds: DatasetInfo) => {
63
+ if (!presetName.trim()) return;
64
+ onSavePreset(presetName.trim(), ds.repo, ds.column, ds.split);
65
+ setPresetName("");
66
+ setSavingPresetForId(null);
67
+ };
68
+
69
+ const getGroupColor = (groupId: string) => {
70
+ const idx = groupIds.indexOf(groupId);
71
+ return GROUP_COLORS[idx % GROUP_COLORS.length];
72
+ };
73
+
74
+ return (
75
+ <div className="w-72 min-w-72 bg-gray-900 border-r border-gray-700 flex flex-col h-full">
76
+ {/* Presets section */}
77
+ <div className="p-3 border-b border-gray-700">
78
+ <div className="flex items-center justify-between mb-2">
79
+ <h3 className="text-xs font-semibold text-gray-400 uppercase tracking-wider">Presets</h3>
80
+ </div>
81
+ {presets.length === 0 ? (
82
+ <p className="text-xs text-gray-500 italic">No presets saved</p>
83
+ ) : (
84
+ <>
85
+ {presets.length > 6 && (
86
+ <input
87
+ type="text"
88
+ value={presetSearch}
89
+ onChange={(e) => setPresetSearch(e.target.value)}
90
+ placeholder="Search presets..."
91
+ className="w-full px-2 py-1 mb-2 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"
92
+ />
93
+ )}
94
+ <div className="flex flex-wrap gap-1 max-h-32 overflow-y-auto">
95
+ {presets
96
+ .filter((p) => !presetSearch || p.name.toLowerCase().includes(presetSearch.toLowerCase()) || p.repo.toLowerCase().includes(presetSearch.toLowerCase()))
97
+ .map((p) => (
98
+ <div key={p.id} className="group relative">
99
+ <button
100
+ onClick={() => onLoadPreset(p)}
101
+ className="px-2 py-1 text-xs bg-gray-800 hover:bg-gray-700 rounded border border-gray-600 text-gray-300 transition-colors"
102
+ title={`${p.repo} (${p.column}, ${p.split ?? "train"})`}
103
+ >
104
+ {p.name}
105
+ </button>
106
+ <div className="hidden group-hover:flex absolute top-full left-0 mt-1 z-10 gap-1">
107
+ <button
108
+ onClick={() => onDeletePreset(p.id)}
109
+ className="px-1.5 py-0.5 text-[10px] bg-red-900 hover:bg-red-800 rounded text-red-300"
110
+ >
111
+ Delete
112
+ </button>
113
+ </div>
114
+ </div>
115
+ ))}
116
+ </div>
117
+ </>
118
+ )}
119
+ </div>
120
+
121
+ {/* Datasets section — grouped by question fingerprint */}
122
+ <div className="flex-1 overflow-y-auto p-3">
123
+ <h3 className="text-xs font-semibold text-gray-400 uppercase tracking-wider mb-2">Loaded Repos</h3>
124
+ {datasets.length === 0 ? (
125
+ <p className="text-xs text-gray-500 italic">No repos loaded. Add one below.</p>
126
+ ) : (
127
+ <div className="space-y-3">
128
+ {groupIds.map((gid) => {
129
+ const color = getGroupColor(gid);
130
+ const groupDatasets = groups[gid];
131
+ const isCurrentGroup = gid === currentGroupId;
132
+
133
+ return (
134
+ <div key={gid}>
135
+ {/* Group header — clickable to switch group */}
136
+ <button
137
+ onClick={() => onSetCurrentGroup(gid)}
138
+ className={`w-full flex items-center gap-1.5 mb-1 px-1 py-0.5 rounded transition-colors ${
139
+ isCurrentGroup ? "bg-gray-800" : "hover:bg-gray-800/50"
140
+ }`}
141
+ >
142
+ <span className={`inline-block w-2 h-2 rounded-full ${color.bg} shrink-0`} />
143
+ <span className={`text-[10px] font-semibold uppercase tracking-wider ${
144
+ isCurrentGroup ? color.label : "text-gray-500"
145
+ }`}>
146
+ Group {groupIds.indexOf(gid) + 1}
147
+ <span className="normal-case font-normal ml-1 text-gray-600">
148
+ ({groupDatasets.length} repo{groupDatasets.length !== 1 ? "s" : ""})
149
+ </span>
150
+ </span>
151
+ {isCurrentGroup && (
152
+ <span className="text-[9px] text-gray-600 ml-auto">viewing</span>
153
+ )}
154
+ </button>
155
+
156
+ {/* Repos in this group */}
157
+ <div className={`space-y-1 border-l-2 ml-1 pl-2 ${
158
+ isCurrentGroup ? color.border : "border-gray-700"
159
+ }`}>
160
+ {groupDatasets.map((ds) => (
161
+ <div key={ds.id}>
162
+ <div
163
+ onClick={() => {
164
+ if (ds.presetId) {
165
+ setEditingDatasetId(editingDatasetId === ds.id ? null : ds.id);
166
+ setEditPresetName(ds.presetName || "");
167
+ setShowAddModal(false);
168
+ }
169
+ }}
170
+ className={`flex items-center gap-2 px-2 py-1.5 rounded text-sm transition-colors ${
171
+ ds.active ? "bg-gray-800" : "bg-gray-900 opacity-60"
172
+ } ${editingDatasetId === ds.id ? "ring-1 ring-blue-500" : ""} ${ds.presetId ? "cursor-pointer" : ""}`}
173
+ >
174
+ <input
175
+ type="checkbox"
176
+ checked={ds.active}
177
+ onChange={() => onToggleDataset(ds.id)}
178
+ onClick={(e) => e.stopPropagation()}
179
+ className="rounded border-gray-600 bg-gray-800 text-blue-500 focus:ring-blue-500 focus:ring-offset-0"
180
+ />
181
+ <div className="flex-1 min-w-0">
182
+ <div className="text-gray-200 truncate text-xs font-medium" title={ds.presetName ? `${ds.presetName}\n${ds.repo}` : ds.repo}>
183
+ {ds.presetName || ds.name}
184
+ </div>
185
+ <div className="text-[10px] text-gray-500">
186
+ {ds.column} | {ds.n_rows} rows | {ds.n_samples} samples
187
+ </div>
188
+ </div>
189
+ {/* Save as preset */}
190
+ <button
191
+ onClick={(e) => {
192
+ e.stopPropagation();
193
+ setSavingPresetForId(savingPresetForId === ds.id ? null : ds.id);
194
+ setPresetName("");
195
+ }}
196
+ className={`transition-colors shrink-0 ${
197
+ savingPresetForId === ds.id
198
+ ? "text-blue-400"
199
+ : "text-gray-600 hover:text-blue-400"
200
+ }`}
201
+ title="Save as preset"
202
+ >
203
+ <svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
204
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M5 5a2 2 0 012-2h10a2 2 0 012 2v16l-7-3.5L5 21V5z" />
205
+ </svg>
206
+ </button>
207
+ {/* Remove */}
208
+ <button
209
+ onClick={(e) => { e.stopPropagation(); onRemoveDataset(ds.id); }}
210
+ className="text-gray-600 hover:text-red-400 transition-colors shrink-0"
211
+ title="Remove"
212
+ >
213
+ <svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
214
+ <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
215
+ </svg>
216
+ </button>
217
+ </div>
218
+ {/* Inline preset name input */}
219
+ {savingPresetForId === ds.id && (
220
+ <div className="flex gap-1 mt-1 ml-6">
221
+ <input
222
+ type="text"
223
+ value={presetName}
224
+ onChange={(e) => setPresetName(e.target.value)}
225
+ onKeyDown={(e) => {
226
+ if (e.key === "Enter") handleSavePresetForRepo(ds);
227
+ if (e.key === "Escape") setSavingPresetForId(null);
228
+ }}
229
+ placeholder="Preset name..."
230
+ className="flex-1 px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"
231
+ autoFocus
232
+ />
233
+ <button
234
+ onClick={() => handleSavePresetForRepo(ds)}
235
+ className="px-2 py-1 text-xs bg-blue-600 hover:bg-blue-500 rounded text-white"
236
+ >
237
+ Save
238
+ </button>
239
+ </div>
240
+ )}
241
+ </div>
242
+ ))}
243
+ </div>
244
+ </div>
245
+ );
246
+ })}
247
+ </div>
248
+ )}
249
+ </div>
250
+
251
+ {/* Preset edit panel */}
252
+ {editingDatasetId && (() => {
253
+ const editDs = datasets.find(d => d.id === editingDatasetId);
254
+ if (!editDs?.presetId) return null;
255
+ return (
256
+ <div className="p-3 border-t border-gray-700 space-y-2">
257
+ <div className="text-[10px] text-gray-500 uppercase font-semibold tracking-wider">Edit Preset</div>
258
+ <input
259
+ type="text"
260
+ value={editPresetName}
261
+ onChange={(e) => setEditPresetName(e.target.value)}
262
+ onKeyDown={(e) => {
263
+ if (e.key === "Enter" && editPresetName.trim()) {
264
+ onUpdatePreset(editDs.presetId!, editDs.id, { name: editPresetName.trim() });
265
+ setEditingDatasetId(null);
266
+ }
267
+ if (e.key === "Escape") setEditingDatasetId(null);
268
+ }}
269
+ placeholder="Preset name..."
270
+ className="w-full px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"
271
+ autoFocus
272
+ />
273
+ <div className="flex gap-2">
274
+ <button
275
+ onClick={() => {
276
+ if (editPresetName.trim()) {
277
+ onUpdatePreset(editDs.presetId!, editDs.id, { name: editPresetName.trim() });
278
+ setEditingDatasetId(null);
279
+ }
280
+ }}
281
+ disabled={!editPresetName.trim()}
282
+ className="flex-1 px-2 py-1 text-xs bg-blue-600 hover:bg-blue-500 disabled:bg-gray-700 disabled:text-gray-500 rounded text-white transition-colors"
283
+ >
284
+ Save
285
+ </button>
286
+ <button
287
+ onClick={() => {
288
+ onDeletePreset(editDs.presetId!, editDs.id);
289
+ setEditingDatasetId(null);
290
+ }}
291
+ className="px-2 py-1 text-xs bg-red-900 hover:bg-red-800 rounded text-red-300 transition-colors"
292
+ >
293
+ Delete
294
+ </button>
295
+ <button
296
+ onClick={() => setEditingDatasetId(null)}
297
+ className="px-2 py-1 text-xs bg-gray-700 hover:bg-gray-600 rounded text-gray-300 transition-colors"
298
+ >
299
+ Cancel
300
+ </button>
301
+ </div>
302
+ </div>
303
+ );
304
+ })()}
305
+
306
+ {/* Add repo section */}
307
+ <div className="p-3 border-t border-gray-700">
308
+ {!showAddModal ? (
309
+ <button
310
+ onClick={() => {
311
+ setEditingDatasetId(null);
312
+ setShowAddModal(true);
313
+ setRepoInput("");
314
+ setColumnInput("");
315
+ setSplitInput("train");
316
+ setPromptColumnInput("");
317
+ }}
318
+ className="w-full px-3 py-2 text-sm bg-blue-600 hover:bg-blue-500 rounded text-white font-medium transition-colors"
319
+ >
320
+ + Add Repo
321
+ </button>
322
+ ) : (
323
+ <div className="space-y-2">
324
+ <input
325
+ type="text"
326
+ value={repoInput}
327
+ onChange={(e) => setRepoInput(e.target.value)}
328
+ onKeyDown={(e) => e.key === "Enter" && handleAdd()}
329
+ placeholder="org/dataset-name"
330
+ className="w-full px-2 py-1.5 text-sm bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"
331
+ autoFocus
332
+ />
333
+ <div className="flex gap-2">
334
+ <input
335
+ type="text"
336
+ value={columnInput}
337
+ onChange={(e) => setColumnInput(e.target.value)}
338
+ placeholder="Response col (auto-detect)"
339
+ className="flex-1 px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"
340
+ />
341
+ <input
342
+ type="text"
343
+ value={splitInput}
344
+ onChange={(e) => setSplitInput(e.target.value)}
345
+ placeholder="Split"
346
+ className="w-16 px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"
347
+ />
348
+ </div>
349
+ <div className="flex gap-2">
350
+ <input
351
+ type="text"
352
+ value={promptColumnInput}
353
+ onChange={(e) => setPromptColumnInput(e.target.value)}
354
+ placeholder="Prompt col (auto-detect)"
355
+ className="flex-1 px-2 py-1 text-xs bg-gray-800 border border-gray-600 rounded text-gray-200 placeholder-gray-500 focus:border-blue-500 focus:outline-none"
356
+ />
357
+ </div>
358
+ <div className="flex gap-2">
359
+ <button
360
+ onClick={handleAdd}
361
+ disabled={!repoInput.trim() || loading[repoInput.trim()]}
362
+ className="flex-1 px-2 py-1.5 text-sm bg-blue-600 hover:bg-blue-500 disabled:bg-gray-700 disabled:text-gray-500 rounded text-white transition-colors"
363
+ >
364
+ {loading[repoInput.trim()] ? "Loading..." : "Load"}
365
+ </button>
366
+ <button
367
+ onClick={() => setShowAddModal(false)}
368
+ className="px-3 py-1.5 text-sm bg-gray-700 hover:bg-gray-600 rounded text-gray-300 transition-colors"
369
+ >
370
+ Cancel
371
+ </button>
372
+ </div>
373
+ </div>
374
+ )}
375
+ </div>
376
+ </div>
377
+ );
378
+ }
frontend/src/model/components/TracePanel.tsx ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState } from "react";
2
+ import type { QuestionData } from "../types";
3
+ import { highlightTrace } from "../utils/traceHighlight";
4
+ import { parsePrompt, type ParsedMessage } from "../utils/promptParser";
5
+
6
+ export interface DragHandleProps {
7
+ draggable: true;
8
+ onDragStart: (e: React.DragEvent) => void;
9
+ onDragEnd: (e: React.DragEvent) => void;
10
+ }
11
+
12
+ interface TracePanelProps {
13
+ datasetName: string;
14
+ repoName?: string;
15
+ data: QuestionData | undefined;
16
+ sampleIdx: number;
17
+ isLoading?: boolean;
18
+ dragHandleProps?: DragHandleProps;
19
+ }
20
+
21
+ export default function TracePanel({ datasetName, repoName, data, sampleIdx, isLoading, dragHandleProps }: TracePanelProps) {
22
+ const [promptExpanded, setPromptExpanded] = useState(false);
23
+
24
+ if (isLoading) {
25
+ return (
26
+ <div className="h-full border border-gray-700 rounded-lg flex items-center justify-center">
27
+ <div className="text-gray-500 text-sm">Loading...</div>
28
+ </div>
29
+ );
30
+ }
31
+
32
+ if (!data) {
33
+ return (
34
+ <div className="h-full border border-gray-700 rounded-lg flex items-center justify-center">
35
+ <div className="text-gray-500 text-sm">No data</div>
36
+ </div>
37
+ );
38
+ }
39
+
40
+ const isCorrect = data.eval_correct[sampleIdx];
41
+ const analysis = data.analyses[sampleIdx];
42
+ const extraction = data.extractions?.[sampleIdx];
43
+
44
+ const borderColor = isCorrect === undefined
45
+ ? "border-gray-700"
46
+ : isCorrect
47
+ ? "border-green-600"
48
+ : "border-red-600";
49
+
50
+ const thinkSegments = highlightTrace(analysis?.think_text || "");
51
+ const answerText = analysis?.answer_text || "";
52
+
53
+ const promptMessages = data.prompt_text ? parsePrompt(data.prompt_text) : [];
54
+
55
+ return (
56
+ <div className={`h-full border-2 ${borderColor} rounded-lg flex flex-col bg-gray-900/50`}>
57
+ {/* Header */}
58
+ <div className="px-3 py-2 border-b border-gray-700 flex items-center justify-between shrink-0">
59
+ <div className="flex items-center gap-2 min-w-0">
60
+ <span className="text-sm font-semibold text-gray-200 truncate" title={repoName ? `${datasetName}\n${repoName}` : datasetName}>{datasetName}</span>
61
+ {isCorrect !== undefined && (
62
+ <span className={`px-1.5 py-0.5 text-[10px] rounded font-medium ${
63
+ isCorrect ? "bg-green-900 text-green-300" : "bg-red-900 text-red-300"
64
+ }`}>
65
+ {isCorrect ? "CORRECT" : "WRONG"}
66
+ </span>
67
+ )}
68
+ </div>
69
+ <div className="flex items-center gap-1.5 shrink-0 ml-2">
70
+ <span className="text-[10px] text-gray-500">
71
+ {analysis && (
72
+ <>Think: {analysis.think_len.toLocaleString()} | BT: {analysis.backtracks}</>
73
+ )}
74
+ </span>
75
+ {dragHandleProps && (
76
+ <span
77
+ {...dragHandleProps}
78
+ title="Drag to reorder"
79
+ className="drag-handle text-gray-600 hover:text-gray-400 transition-colors"
80
+ >
81
+ <svg width="14" height="14" viewBox="0 0 16 16" fill="currentColor">
82
+ <circle cx="5" cy="3" r="1.5" />
83
+ <circle cx="11" cy="3" r="1.5" />
84
+ <circle cx="5" cy="8" r="1.5" />
85
+ <circle cx="11" cy="8" r="1.5" />
86
+ <circle cx="5" cy="13" r="1.5" />
87
+ <circle cx="11" cy="13" r="1.5" />
88
+ </svg>
89
+ </span>
90
+ )}
91
+ </div>
92
+ </div>
93
+
94
+ {/* Extraction / extracted answer */}
95
+ {extraction && (
96
+ <div className="px-3 py-1.5 border-b border-gray-700/50 bg-gray-800/30 overflow-x-auto whitespace-nowrap">
97
+ <span className="text-[10px] text-gray-500 uppercase font-medium">Extracted: </span>
98
+ <span className="text-xs text-gray-300 font-mono">{extraction}</span>
99
+ </div>
100
+ )}
101
+
102
+ {/* Trace content */}
103
+ <div className="flex-1 overflow-y-auto trace-scroll px-3 py-2">
104
+ {/* Prompt section — collapsible */}
105
+ {promptMessages.length > 0 && (
106
+ <div className="mb-3">
107
+ <button
108
+ onClick={() => setPromptExpanded(!promptExpanded)}
109
+ className="flex items-center gap-1 text-xs font-semibold text-gray-400 uppercase tracking-wider mb-1 hover:text-gray-300 transition-colors"
110
+ >
111
+ <span className="text-[10px]">{promptExpanded ? "\u25BC" : "\u25B6"}</span>
112
+ Prompt ({promptMessages.length} message{promptMessages.length !== 1 ? "s" : ""})
113
+ </button>
114
+ {promptExpanded && (
115
+ <div className="space-y-1.5">
116
+ {promptMessages.map((msg, i) => (
117
+ <PromptMessage key={i} message={msg} />
118
+ ))}
119
+ </div>
120
+ )}
121
+ </div>
122
+ )}
123
+
124
+ {/* Thinking section */}
125
+ <div className="mb-3">
126
+ <div className="text-xs font-semibold text-gray-400 uppercase tracking-wider mb-1">
127
+ Thinking ({analysis?.think_len.toLocaleString() || 0} chars)
128
+ </div>
129
+ <pre className="text-xs leading-relaxed whitespace-pre-wrap font-mono">
130
+ {thinkSegments.map((seg, i) => (
131
+ <span key={i} className={seg.className}>{seg.text}</span>
132
+ ))}
133
+ </pre>
134
+ </div>
135
+
136
+ {/* Answer section */}
137
+ {answerText && (
138
+ <div>
139
+ <div className="text-xs font-semibold text-gray-400 uppercase tracking-wider mb-1">
140
+ Answer ({analysis?.answer_len.toLocaleString() || 0} chars)
141
+ </div>
142
+ <pre className="text-xs leading-relaxed whitespace-pre-wrap font-mono text-gray-100 font-bold">
143
+ {answerText}
144
+ </pre>
145
+ </div>
146
+ )}
147
+ </div>
148
+ </div>
149
+ );
150
+ }
151
+
152
+ const ROLE_STYLES: Record<string, { border: string; label: string; bg: string }> = {
153
+ system: { border: "border-l-purple-500", label: "text-purple-400", bg: "bg-purple-500/5" },
154
+ user: { border: "border-l-blue-500", label: "text-blue-400", bg: "bg-blue-500/5" },
155
+ assistant: { border: "border-l-green-500", label: "text-green-400", bg: "bg-green-500/5" },
156
+ prompt: { border: "border-l-gray-500", label: "text-gray-400", bg: "bg-gray-500/5" },
157
+ };
158
+
159
+ function PromptMessage({ message }: { message: ParsedMessage }) {
160
+ const style = ROLE_STYLES[message.role] || ROLE_STYLES.prompt;
161
+ return (
162
+ <div className={`border-l-2 ${style.border} ${style.bg} rounded-r pl-2 py-1`}>
163
+ <div className={`text-[10px] font-semibold uppercase tracking-wider ${style.label}`}>
164
+ {message.role}
165
+ </div>
166
+ <pre className="text-xs leading-relaxed whitespace-pre-wrap font-mono text-gray-300 max-h-60 overflow-y-auto">
167
+ {message.content}
168
+ </pre>
169
+ </div>
170
+ );
171
+ }
frontend/src/model/store.ts ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState, useCallback, useEffect, useMemo, useRef } from "react";
2
+ import type { DatasetInfo, QuestionData, Preset, FilterMode } from "./types";
3
+ import { api } from "./api";
4
+ import { parseHash, replaceRoute } from "../hashRouter";
5
+
6
+ interface GroupIndices {
7
+ questionIdx: number;
8
+ sampleIdx: number;
9
+ }
10
+
11
+ export function useAppState() {
12
+ // Capture URL params at init BEFORE any effects can wipe them
13
+ const initialParams = useRef(parseHash().params);
14
+
15
+ const [datasets, setDatasets] = useState<DatasetInfo[]>([]);
16
+ const [presets, setPresets] = useState<Preset[]>([]);
17
+ const [filter, setFilter] = useState<FilterMode>("all");
18
+ const [questionDataMap, setQuestionDataMap] = useState<Record<string, QuestionData>>({});
19
+ const [loading, setLoading] = useState<Record<string, boolean>>({});
20
+ const [error, setError] = useState<string | null>(null);
21
+
22
+ // Per-group navigation indices
23
+ const [groupIndices, setGroupIndices] = useState<Record<string, GroupIndices>>({});
24
+ // Which group is currently displayed (fingerprint)
25
+ const [currentGroupId, setCurrentGroupId] = useState<string | null>(null);
26
+
27
+ // Load presets on mount
28
+ useEffect(() => {
29
+ api.listPresets().then(setPresets).catch(() => {});
30
+ }, []);
31
+
32
+ // Sync URL state on mount (read from hash params) — q/s/filter only, repos loaded below
33
+ useEffect(() => {
34
+ const route = parseHash();
35
+ const params = route.params;
36
+ const q = parseInt(params.get("q") || "0");
37
+ const s = parseInt(params.get("s") || "0");
38
+ const f = (params.get("filter") || "all") as FilterMode;
39
+ setFilter(f);
40
+ // q and s will be applied once the first group is set
41
+ if (!isNaN(q) || !isNaN(s)) {
42
+ // Store initial URL indices to apply to first group loaded
43
+ (window as unknown as Record<string, unknown>).__initialQ = isNaN(q) ? 0 : q;
44
+ (window as unknown as Record<string, unknown>).__initialS = isNaN(s) ? 0 : s;
45
+ }
46
+ }, []);
47
+
48
+ // Derive groups from datasets by fingerprint
49
+ const groups = useMemo(() => {
50
+ const map: Record<string, DatasetInfo[]> = {};
51
+ for (const ds of datasets) {
52
+ const fp = ds.questionFingerprint;
53
+ if (!map[fp]) map[fp] = [];
54
+ map[fp].push(ds);
55
+ }
56
+ return map;
57
+ }, [datasets]);
58
+
59
+ const groupIds = useMemo(() => Object.keys(groups).sort(), [groups]);
60
+
61
+ // Auto-set currentGroupId if not set or invalid
62
+ useEffect(() => {
63
+ if (currentGroupId && groups[currentGroupId]) return;
64
+ // Pick first group that has active datasets, or first group overall
65
+ const activeGroup = groupIds.find(gid => groups[gid].some(d => d.active));
66
+ if (activeGroup) {
67
+ setCurrentGroupId(activeGroup);
68
+ } else if (groupIds.length > 0) {
69
+ setCurrentGroupId(groupIds[0]);
70
+ } else {
71
+ setCurrentGroupId(null);
72
+ }
73
+ }, [groupIds, groups, currentGroupId]);
74
+
75
+ // Active datasets = active datasets in current group
76
+ const activeDatasets = useMemo(
77
+ () => datasets.filter(d => d.active && d.questionFingerprint === currentGroupId),
78
+ [datasets, currentGroupId]
79
+ );
80
+
81
+ // Panel ordering: track display order of active dataset IDs
82
+ const [panelOrder, setPanelOrder] = useState<string[]>([]);
83
+
84
+ // Keep panelOrder in sync with activeDatasets: add new IDs at end, remove stale ones
85
+ useEffect(() => {
86
+ const activeIds = new Set(activeDatasets.map(d => d.id));
87
+ setPanelOrder(prev => {
88
+ const kept = prev.filter(id => activeIds.has(id));
89
+ const newIds = activeDatasets.map(d => d.id).filter(id => !prev.includes(id));
90
+ const merged = [...kept, ...newIds];
91
+ // Only update if changed to avoid unnecessary renders
92
+ if (merged.length === prev.length && merged.every((id, i) => id === prev[i])) return prev;
93
+ return merged;
94
+ });
95
+ }, [activeDatasets]);
96
+
97
+ // Ordered active datasets according to panelOrder
98
+ const orderedActiveDatasets = useMemo(() => {
99
+ const map = new Map(activeDatasets.map(d => [d.id, d]));
100
+ return panelOrder.map(id => map.get(id)).filter((d): d is DatasetInfo => d !== undefined);
101
+ }, [activeDatasets, panelOrder]);
102
+
103
+ const reorderPanels = useCallback((fromId: string, toId: string) => {
104
+ if (fromId === toId) return;
105
+ setPanelOrder(prev => {
106
+ const order = [...prev];
107
+ const fromIdx = order.indexOf(fromId);
108
+ const toIdx = order.indexOf(toId);
109
+ if (fromIdx === -1 || toIdx === -1) return prev;
110
+ order.splice(fromIdx, 1);
111
+ order.splice(toIdx, 0, fromId);
112
+ return order;
113
+ });
114
+ }, []);
115
+
116
+ // Current group's indices
117
+ const currentIndices = currentGroupId ? groupIndices[currentGroupId] : undefined;
118
+ const questionIdx = currentIndices?.questionIdx ?? 0;
119
+ const sampleIdx = currentIndices?.sampleIdx ?? 0;
120
+
121
+ const setQuestionIdx = useCallback((val: number | ((prev: number) => number)) => {
122
+ if (!currentGroupId) return;
123
+ setGroupIndices(prev => {
124
+ const cur = prev[currentGroupId] ?? { questionIdx: 0, sampleIdx: 0 };
125
+ const newQ = typeof val === "function" ? val(cur.questionIdx) : val;
126
+ return { ...prev, [currentGroupId]: { ...cur, questionIdx: newQ } };
127
+ });
128
+ }, [currentGroupId]);
129
+
130
+ const setSampleIdx = useCallback((val: number | ((prev: number) => number)) => {
131
+ if (!currentGroupId) return;
132
+ setGroupIndices(prev => {
133
+ const cur = prev[currentGroupId] ?? { questionIdx: 0, sampleIdx: 0 };
134
+ const newS = typeof val === "function" ? val(cur.sampleIdx) : val;
135
+ return { ...prev, [currentGroupId]: { ...cur, sampleIdx: newS } };
136
+ });
137
+ }, [currentGroupId]);
138
+
139
+ // Update hash params when state changes
140
+ useEffect(() => {
141
+ const params = new URLSearchParams();
142
+ const activeRepos = datasets.filter((d) => d.active);
143
+ if (activeRepos.length > 0) {
144
+ params.set("repos", activeRepos.map((d) => d.repo).join(","));
145
+ params.set("cols", activeRepos.map((d) => d.column).join(","));
146
+ params.set("pcols", activeRepos.map((d) => d.promptColumn || "formatted_prompt").join(","));
147
+ }
148
+ params.set("q", String(questionIdx));
149
+ params.set("s", String(sampleIdx));
150
+ if (filter !== "all") params.set("filter", filter);
151
+ if (currentGroupId) params.set("group", currentGroupId);
152
+ replaceRoute({ params });
153
+ }, [datasets, questionIdx, sampleIdx, filter, currentGroupId]);
154
+
155
+ // Fetch question data for active datasets in current group when question changes
156
+ useEffect(() => {
157
+ activeDatasets.forEach((ds) => {
158
+ const key = `${ds.id}:${questionIdx}`;
159
+ if (!questionDataMap[key]) {
160
+ api.getQuestion(ds.id, questionIdx).then((data) => {
161
+ setQuestionDataMap((prev) => ({ ...prev, [key]: data }));
162
+ }).catch(() => {});
163
+ }
164
+ });
165
+ }, [questionIdx, activeDatasets]);
166
+
167
+ const addDataset = useCallback(async (
168
+ repo: string, column?: string, split?: string, promptColumn?: string,
169
+ presetId?: string, presetName?: string,
170
+ ) => {
171
+ setLoading((prev) => ({ ...prev, [repo]: true }));
172
+ setError(null);
173
+ try {
174
+ const { question_fingerprint, ...rest } = await api.loadDataset(repo, column, split, promptColumn);
175
+ const fp = question_fingerprint ?? "";
176
+ const dsInfo: DatasetInfo = {
177
+ ...rest,
178
+ questionFingerprint: fp,
179
+ active: true,
180
+ presetId,
181
+ presetName,
182
+ };
183
+
184
+ setDatasets((prev) => {
185
+ if (prev.some((d) => d.id === dsInfo.id)) return prev;
186
+ return [...prev, dsInfo];
187
+ });
188
+
189
+ // Initialize group indices if new group, or inherit existing
190
+ setGroupIndices(prev => {
191
+ if (prev[fp]) return prev; // Group already exists, new repo inherits its indices
192
+ // New group — check for initial URL params or start at 0
193
+ const win = window as unknown as Record<string, unknown>;
194
+ const initQ = typeof win.__initialQ === "number" ? win.__initialQ : 0;
195
+ const initS = typeof win.__initialS === "number" ? win.__initialS : 0;
196
+ // Only use initial params for the very first group
197
+ const isFirstGroup = Object.keys(prev).length === 0;
198
+ return {
199
+ ...prev,
200
+ [fp]: { questionIdx: isFirstGroup ? initQ : 0, sampleIdx: isFirstGroup ? initS : 0 },
201
+ };
202
+ });
203
+
204
+ // Switch to the new dataset's group
205
+ setCurrentGroupId(fp);
206
+ } catch (e: unknown) {
207
+ setError(e instanceof Error ? e.message : "Failed to load dataset");
208
+ } finally {
209
+ setLoading((prev) => ({ ...prev, [repo]: false }));
210
+ }
211
+ }, []);
212
+
213
+ // Auto-load repos from URL (must be after addDataset declaration)
214
+ // Auto-load from URL — uses initialParams captured before any effect could wipe them
215
+ useEffect(() => {
216
+ const params = initialParams.current;
217
+ const repoList = params.get("repos")?.split(",").filter(Boolean) || [];
218
+ const colList = params.get("cols")?.split(",") || [];
219
+ const pcolList = params.get("pcols")?.split(",") || [];
220
+ for (let i = 0; i < repoList.length; i++) {
221
+ addDataset(repoList[i], colList[i] || undefined, undefined, pcolList[i] || undefined);
222
+ }
223
+ }, [addDataset]);
224
+
225
+ const removeDataset = useCallback(async (id: string) => {
226
+ await api.unloadDataset(id).catch(() => {});
227
+ setDatasets((prev) => prev.filter((d) => d.id !== id));
228
+ }, []);
229
+
230
+ const toggleDataset = useCallback((id: string) => {
231
+ setDatasets((prev) => {
232
+ const updated = prev.map((d) => (d.id === id ? { ...d, active: !d.active } : d));
233
+ // If toggling ON a dataset from a different group, switch to that group
234
+ const toggled = updated.find(d => d.id === id);
235
+ if (toggled && toggled.active) {
236
+ setCurrentGroupId(toggled.questionFingerprint);
237
+ }
238
+ return updated;
239
+ });
240
+ }, []);
241
+
242
+ const updateDatasetPresetName = useCallback((dsId: string, name: string) => {
243
+ setDatasets(prev => prev.map(d => d.id === dsId ? { ...d, presetName: name } : d));
244
+ }, []);
245
+
246
+ const clearDatasetPreset = useCallback((dsId: string) => {
247
+ setDatasets(prev => prev.map(d => d.id === dsId ? { ...d, presetId: undefined, presetName: undefined } : d));
248
+ }, []);
249
+
250
+ const maxQuestions = Math.min(...activeDatasets.map((d) => d.n_rows), Infinity);
251
+ const maxSamples = Math.max(...activeDatasets.map((d) => d.n_samples), 0);
252
+
253
+ const getQuestionData = (dsId: string): QuestionData | undefined => {
254
+ return questionDataMap[`${dsId}:${questionIdx}`];
255
+ };
256
+
257
+ return {
258
+ datasets, presets, setPresets,
259
+ questionIdx, setQuestionIdx,
260
+ sampleIdx, setSampleIdx,
261
+ filter, setFilter,
262
+ loading, error, setError,
263
+ activeDatasets, orderedActiveDatasets, maxQuestions, maxSamples,
264
+ addDataset, removeDataset, toggleDataset,
265
+ updateDatasetPresetName, clearDatasetPreset,
266
+ getQuestionData, reorderPanels,
267
+ // Group state
268
+ groups, groupIds, currentGroupId, setCurrentGroupId,
269
+ };
270
+ }
frontend/src/model/types.ts ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export interface DatasetInfo {
2
+ id: string;
3
+ repo: string;
4
+ name: string;
5
+ column: string;
6
+ columns: string[];
7
+ split: string;
8
+ promptColumn: string | null;
9
+ n_rows: number;
10
+ n_samples: number;
11
+ active: boolean;
12
+ questionFingerprint: string;
13
+ presetId?: string;
14
+ presetName?: string;
15
+ }
16
+
17
+ export interface TraceAnalysis {
18
+ total_len: number;
19
+ think_len: number;
20
+ answer_len: number;
21
+ backtracks: number;
22
+ restarts: number;
23
+ think_text: string;
24
+ answer_text: string;
25
+ }
26
+
27
+ export interface QuestionData {
28
+ question: string;
29
+ prompt_text: string;
30
+ responses: string[];
31
+ eval_correct: boolean[];
32
+ extractions: string[];
33
+ metadata: Record<string, unknown>;
34
+ analyses: TraceAnalysis[];
35
+ n_samples: number;
36
+ index: number;
37
+ }
38
+
39
+ export interface DatasetSummary {
40
+ n_rows: number;
41
+ n_samples: number;
42
+ has_eval: boolean;
43
+ sample_accuracy?: { correct: number; total: number; rate: number };
44
+ pass_at?: Record<number, { correct: number; total: number; rate: number }>;
45
+ }
46
+
47
+ export interface Preset {
48
+ id: string;
49
+ name: string;
50
+ repo: string;
51
+ column: string;
52
+ split?: string;
53
+ }
54
+
55
+ export type FilterMode = "all" | "improvements" | "regressions" | "both-correct" | "both-wrong";
frontend/src/model/utils/promptParser.ts ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export interface ParsedMessage {
2
+ role: string;
3
+ content: string;
4
+ }
5
+
6
+ export function parsePrompt(text: string): ParsedMessage[] {
7
+ if (!text || !text.trim()) return [];
8
+
9
+ // Try 1: JSON array of {role, content} objects
10
+ try {
11
+ const parsed = JSON.parse(text);
12
+ if (Array.isArray(parsed) && parsed.length > 0 && parsed[0].role !== undefined) {
13
+ return parsed.map((m: Record<string, unknown>) => ({
14
+ role: String(m.role || "unknown"),
15
+ content: String(m.content ?? ""),
16
+ }));
17
+ }
18
+ } catch {
19
+ // Not JSON
20
+ }
21
+
22
+ // Try 2: ChatML — <|im_start|>role\ncontent<|im_end|>
23
+ if (text.includes("<|im_start|>")) {
24
+ const parts = text.split("<|im_start|>").filter(Boolean);
25
+ return parts.map((part) => {
26
+ const nlIdx = part.indexOf("\n");
27
+ const role = nlIdx > 0 ? part.slice(0, nlIdx).trim() : "unknown";
28
+ const content = (nlIdx > 0 ? part.slice(nlIdx + 1) : part)
29
+ .replace(/<\|im_end\|>/g, "")
30
+ .trim();
31
+ return { role, content };
32
+ });
33
+ }
34
+
35
+ // Try 3: Generic chat template — <|system|>, <|user|>, <|assistant|>
36
+ if (/<\|(system|user|assistant)\|>/.test(text)) {
37
+ const regex = /<\|(system|user|assistant)\|>/g;
38
+ const positions: { role: string; start: number; tagEnd: number }[] = [];
39
+ let match;
40
+ while ((match = regex.exec(text)) !== null) {
41
+ positions.push({
42
+ role: match[1],
43
+ start: match.index,
44
+ tagEnd: match.index + match[0].length,
45
+ });
46
+ }
47
+ return positions.map((pos, i) => {
48
+ const end = i + 1 < positions.length ? positions[i + 1].start : text.length;
49
+ return { role: pos.role, content: text.slice(pos.tagEnd, end).trim() };
50
+ });
51
+ }
52
+
53
+ // Try 4: Llama-style — <<SYS>>, [INST], [/INST]
54
+ if (text.includes("[INST]") || text.includes("<<SYS>>")) {
55
+ const messages: ParsedMessage[] = [];
56
+ const sysMatch = text.match(/<<SYS>>([\s\S]*?)<<\/SYS>>/);
57
+ if (sysMatch) {
58
+ messages.push({ role: "system", content: sysMatch[1].trim() });
59
+ }
60
+ // Split on [INST] and [/INST] markers
61
+ const withoutSys = text.replace(/<<SYS>>[\s\S]*?<<\/SYS>>/g, "");
62
+ const segments = withoutSys.split(/\[INST\]|\[\/INST\]/).map((s) => s.trim()).filter(Boolean);
63
+ let isUser = true;
64
+ for (const seg of segments) {
65
+ messages.push({ role: isUser ? "user" : "assistant", content: seg });
66
+ isUser = !isUser;
67
+ }
68
+ return messages.length > 0 ? messages : [{ role: "prompt", content: text }];
69
+ }
70
+
71
+ // Try 5: Plain labeled — "System:", "User:", "Assistant:", "Human:"
72
+ if (/^(System|User|Assistant|Human):\s/m.test(text)) {
73
+ const regex = /^(System|User|Assistant|Human):\s*/gm;
74
+ const positions: { role: string; contentStart: number }[] = [];
75
+ let match;
76
+ while ((match = regex.exec(text)) !== null) {
77
+ const role = match[1].toLowerCase() === "human" ? "user" : match[1].toLowerCase();
78
+ positions.push({ role, contentStart: match.index + match[0].length });
79
+ }
80
+ return positions.map((pos, i) => {
81
+ const end = i + 1 < positions.length
82
+ ? text.lastIndexOf("\n", positions[i + 1].contentStart - positions[i + 1].role.length - 2)
83
+ : text.length;
84
+ return {
85
+ role: pos.role,
86
+ content: text.slice(pos.contentStart, end > pos.contentStart ? end : text.length).trim(),
87
+ };
88
+ });
89
+ }
90
+
91
+ // Fallback: single prompt block
92
+ return [{ role: "prompt", content: text }];
93
+ }
frontend/src/model/utils/traceHighlight.ts ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export interface HighlightSegment {
2
+ text: string;
3
+ className: string;
4
+ }
5
+
6
+ export function highlightTrace(text: string): HighlightSegment[] {
7
+ if (!text) return [{ text: "(no response)", className: "text-gray-500 italic" }];
8
+
9
+ const segments: HighlightSegment[] = [];
10
+ const lines = text.split("\n");
11
+
12
+ for (let i = 0; i < lines.length; i++) {
13
+ const line = lines[i];
14
+ const lo = line.toLowerCase().trim();
15
+
16
+ let className = "text-gray-300";
17
+
18
+ if (lo.startsWith("wait") || lo.startsWith("hmm") || lo.startsWith("but wait")) {
19
+ className = "text-yellow-400";
20
+ } else if (lo.startsWith("let me try") || lo.startsWith("let me reconsider") || lo.startsWith("let me think")) {
21
+ className = "text-cyan-400";
22
+ } else if (lo.startsWith("so the answer") || lo.startsWith("so the expression") || lo.startsWith("therefore") || lo.startsWith("the final")) {
23
+ className = "text-green-400 font-bold";
24
+ } else if (lo.startsWith("i give up") || lo.startsWith("i can't find") || lo.startsWith("i'm stuck") || lo.startsWith("i'm sorry")) {
25
+ className = "text-red-400 font-bold";
26
+ } else if (line.includes("=") && /[+\-*/]/.test(line)) {
27
+ className = "text-gray-100";
28
+ }
29
+
30
+ segments.push({ text: line, className });
31
+ if (i < lines.length - 1) {
32
+ segments.push({ text: "\n", className: "" });
33
+ }
34
+ }
35
+
36
+ return segments;
37
+ }
frontend/src/visualizer/VisualizerApp.tsx ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { lazy, Suspense } from "react";
2
+ import { useHashRoute, navigateTo } from "../hashRouter";
3
+
4
+ const ModelApp = lazy(() => import("../model/ModelApp"));
5
+
6
+ type TabId = "model";
7
+
8
+ const TABS: { id: TabId; label: string; color: string; activeClass: string }[] = [
9
+ { id: "model", label: "Model Trace", color: "blue", activeClass: "border-blue-500 text-blue-400" },
10
+ ];
11
+
12
+ const VALID_TABS = new Set<string>(TABS.map(t => t.id));
13
+
14
+ export default function VisualizerApp() {
15
+ const route = useHashRoute();
16
+ const activeTab: TabId = VALID_TABS.has(route.tab) ? (route.tab as TabId) : "model";
17
+
18
+ return (
19
+ <div className="h-full flex flex-col">
20
+ {/* Visualizer tab bar */}
21
+ <div className="flex items-center border-b border-gray-800 bg-gray-900/50 px-2 shrink-0">
22
+ {TABS.map((tab) => (
23
+ <button
24
+ key={tab.id}
25
+ onClick={() => navigateTo({ page: "viz", tab: tab.id })}
26
+ className={`px-5 py-2 text-sm font-medium border-b-2 transition-colors ${
27
+ activeTab === tab.id
28
+ ? tab.activeClass
29
+ : "border-transparent text-gray-500 hover:text-gray-300"
30
+ }`}
31
+ >
32
+ {tab.label}
33
+ </button>
34
+ ))}
35
+ </div>
36
+
37
+ {/* Active visualizer */}
38
+ <div className="flex-1 overflow-hidden">
39
+ <Suspense
40
+ fallback={
41
+ <div className="flex items-center justify-center h-full text-gray-500">
42
+ Loading...
43
+ </div>
44
+ }
45
+ >
46
+ {activeTab === "model" && (
47
+ <div className="theme-model h-full">
48
+ <ModelApp />
49
+ </div>
50
+ )}
51
+ </Suspense>
52
+ </div>
53
+ </div>
54
+ );
55
+ }
frontend/src/vite-env.d.ts ADDED
@@ -0,0 +1 @@
 
 
1
+ /// <reference types="vite/client" />
frontend/tailwind.config.js ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ /** @type {import('tailwindcss').Config} */
2
+ export default {
3
+ content: ["./index.html", "./src/**/*.{js,ts,jsx,tsx}"],
4
+ darkMode: "class",
5
+ theme: {
6
+ extend: {},
7
+ },
8
+ plugins: [],
9
+ };
frontend/tsconfig.app.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2020",
4
+ "useDefineForClassFields": true,
5
+ "lib": ["ES2020", "DOM", "DOM.Iterable"],
6
+ "module": "ESNext",
7
+ "skipLibCheck": true,
8
+ "moduleResolution": "bundler",
9
+ "allowImportingTsExtensions": true,
10
+ "isolatedModules": true,
11
+ "moduleDetection": "force",
12
+ "noEmit": true,
13
+ "jsx": "react-jsx",
14
+ "strict": true,
15
+ "noUnusedLocals": false,
16
+ "noUnusedParameters": false,
17
+ "noFallthroughCasesInSwitch": true,
18
+ "forceConsistentCasingInFileNames": true
19
+ },
20
+ "include": ["src"]
21
+ }
frontend/tsconfig.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "files": [],
3
+ "references": [{ "path": "./tsconfig.app.json" }]
4
+ }
frontend/vite.config.ts ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { defineConfig } from "vite";
2
+ import react from "@vitejs/plugin-react";
3
+
4
+ export default defineConfig({
5
+ plugins: [react()],
6
+ server: {
7
+ port: 5173,
8
+ proxy: {
9
+ "/api": {
10
+ target: "http://localhost:8080",
11
+ changeOrigin: true,
12
+ },
13
+ },
14
+ },
15
+ build: {
16
+ outDir: "dist",
17
+ },
18
+ });