studyOverflow commited on
Commit
73321fe
·
verified ·
1 Parent(s): cf6f941

feat: binary Y/N verdict; 2x2 layout; shared claim queue (no cross-annotator overlap)

Browse files
Files changed (1) hide show
  1. app.py +268 -212
app.py CHANGED
@@ -1,8 +1,17 @@
1
  """MBench-V annotation UI (Gradio Space).
2
 
3
- Streams videos from `studyOverflow/TempMemoryData` (no local copy); writes
4
- annotations back to the same dataset repo under `annotations/`, batched via
5
- `CommitScheduler`.
 
 
 
 
 
 
 
 
 
6
  """
7
 
8
  from __future__ import annotations
@@ -10,13 +19,14 @@ from __future__ import annotations
10
  import json
11
  import os
12
  import random
 
13
  import time
14
  import uuid
15
  from pathlib import Path
16
  from typing import Any
17
 
18
  import gradio as gr
19
- from huggingface_hub import CommitScheduler, hf_hub_download, hf_hub_url
20
 
21
  # ---------------------------------------------------------------------------
22
  # Config
@@ -25,8 +35,6 @@ from huggingface_hub import CommitScheduler, hf_hub_download, hf_hub_url
25
  DATASET_REPO = "studyOverflow/TempMemoryData"
26
  MERGED_JSON_PATH = "MBench-V/merged.json"
27
 
28
- # 8 fully-reorganized models (584 videos each). All 8 models have complete
29
- # data as of 2026-05-01.
30
  MODELS: list[str] = [
31
  "causal_forcing",
32
  "self_forcing",
@@ -44,7 +52,9 @@ ANN_DIR = Path("annotations_local")
44
  ANN_DIR.mkdir(exist_ok=True)
45
  PROCESS_ID = uuid.uuid4().hex[:8]
46
  ANN_FILE = ANN_DIR / f"ann_{PROCESS_ID}.jsonl"
47
- COMMIT_INTERVAL_MIN = 5
 
 
48
 
49
 
50
  # ---------------------------------------------------------------------------
@@ -53,10 +63,8 @@ COMMIT_INTERVAL_MIN = 5
53
 
54
  def _load_merged() -> list[dict[str, Any]]:
55
  local = hf_hub_download(
56
- repo_id=DATASET_REPO,
57
- filename=MERGED_JSON_PATH,
58
- repo_type="dataset",
59
- token=HF_TOKEN,
60
  )
61
  with open(local, encoding="utf-8") as f:
62
  return json.load(f)
@@ -65,42 +73,33 @@ def _load_merged() -> list[dict[str, Any]]:
65
  TASKS: list[dict[str, Any]] = _load_merged()
66
  TASK_BY_ID: dict[str, dict[str, Any]] = {t["task_id"]: t for t in TASKS}
67
 
 
 
 
68
 
69
- def _extract_prompt(task: dict[str, Any]) -> str:
70
- """Return the 5-segment prompt (level_3), joined with segment headers.
71
 
72
- All MBench-V models were actually fed the 5-segment (level_3) prompts at
73
- inference time, so this is what annotators should see. If level_3 is
74
- missing for some reason we fall back to the finest available level.
75
- """
 
 
 
 
 
 
76
  gp = task.get("generation_prompts") or {}
77
  prompts = gp.get("prompts") or {}
78
- # Prefer the canonical 5-segment split; otherwise fall back gracefully.
79
  for level in ("level_3", "level_4", "level_2", "level_1"):
80
  val = prompts.get(level)
81
  if isinstance(val, list) and val:
82
  n = len(val)
83
- parts = []
84
- for i, seg in enumerate(val, 1):
85
- parts.append(f"— Segment {i}/{n} —\n{seg}")
86
- return "\n\n".join(parts)
87
  if isinstance(val, str) and val:
88
  return val
89
  return "(no prompt found)"
90
 
91
 
92
- POOL: list[tuple[str, str]] = [(m, t["task_id"]) for m in MODELS for t in TASKS]
93
- print(f"[mbench-ann] loaded {len(TASKS)} tasks × {len(MODELS)} models = {len(POOL)} items")
94
-
95
-
96
- def _video_url(model: str, task_id: str) -> str:
97
- return hf_hub_url(
98
- DATASET_REPO,
99
- filename=f"MBench-V/{model}/videos/{task_id}.mp4",
100
- repo_type="dataset",
101
- )
102
-
103
-
104
  # ---------------------------------------------------------------------------
105
  # CommitScheduler
106
  # ---------------------------------------------------------------------------
@@ -108,51 +107,25 @@ def _video_url(model: str, task_id: str) -> str:
108
  scheduler: CommitScheduler | None = None
109
  if HF_TOKEN:
110
  scheduler = CommitScheduler(
111
- repo_id=DATASET_REPO,
112
- repo_type="dataset",
113
- folder_path=str(ANN_DIR),
114
- path_in_repo="annotations",
115
- every=COMMIT_INTERVAL_MIN,
116
- token=HF_TOKEN,
117
- private=False,
118
- squash_history=False,
119
  )
120
  print(f"[mbench-ann] CommitScheduler started (every {COMMIT_INTERVAL_MIN} min)")
121
  else:
122
  print("[mbench-ann] WARNING: HF_TOKEN not set — annotations stay local only")
123
 
124
 
125
- def _append_annotation(record: dict[str, Any]) -> None:
126
- line = json.dumps(record, ensure_ascii=False)
127
- if scheduler is not None:
128
- with scheduler.lock:
129
- with ANN_FILE.open("a", encoding="utf-8") as f:
130
- f.write(line + "\n")
131
- else:
132
- with ANN_FILE.open("a", encoding="utf-8") as f:
133
- f.write(line + "\n")
134
- # Update in-memory mirror so progress stats react immediately (the
135
- # committed file only arrives on the dataset every COMMIT_INTERVAL_MIN).
136
- HISTORICAL_ANNOTATIONS.append(record)
137
-
138
-
139
  # ---------------------------------------------------------------------------
140
- # Load historical annotations (for dedup + progress stats)
141
  # ---------------------------------------------------------------------------
142
 
143
  def _fetch_remote_annotations() -> list[dict[str, Any]]:
144
- """Download and parse every .jsonl file under `annotations/` on the dataset repo.
145
-
146
- Returns a list of records. Silently returns [] if the folder does not exist
147
- or any download fails — annotation UX should never be blocked by this.
148
- """
149
- from huggingface_hub import HfApi
150
  records: list[dict[str, Any]] = []
151
  try:
152
  api = HfApi(token=HF_TOKEN)
153
- files = api.list_repo_files(
154
- repo_id=DATASET_REPO, repo_type="dataset",
155
- )
156
  except Exception as e:
157
  print(f"[mbench-ann] list_repo_files failed: {e}")
158
  return records
@@ -182,126 +155,206 @@ HISTORICAL_ANNOTATIONS: list[dict[str, Any]] = _fetch_remote_annotations()
182
  print(f"[mbench-ann] loaded {len(HISTORICAL_ANNOTATIONS)} historical annotation records")
183
 
184
 
185
- def _global_stats() -> tuple[int, int]:
186
- """(total_records, unique_(model,task_id)_pairs_covered)."""
187
- seen: set[tuple[str, str]] = set()
188
- for r in HISTORICAL_ANNOTATIONS:
189
- if "model" in r and "task_id" in r:
190
- seen.add((r["model"], r["task_id"]))
191
- return len(HISTORICAL_ANNOTATIONS), len(seen)
192
 
 
193
 
194
- def _annotator_seen(annotator: str) -> set[tuple[str, str]]:
195
- """`(model, task_id)` pairs already annotated by this annotator."""
196
- annotator_l = annotator.strip().lower()
197
- seen: set[tuple[str, str]] = set()
198
- for r in HISTORICAL_ANNOTATIONS:
199
- if (r.get("annotator") or "").strip().lower() == annotator_l:
200
- seen.add((r.get("model", ""), r.get("task_id", "")))
201
- return seen
202
 
 
 
203
 
204
- def _global_stats_md() -> str:
205
- total, unique = _global_stats()
206
- pool_sz = len(POOL)
207
- pct = (unique / pool_sz * 100) if pool_sz else 0
208
- return (
209
- f"**Dataset progress**: {total} total annotations submitted • "
210
- f"{unique} / {pool_sz} unique (model, task_id) pairs covered ({pct:.1f}%)"
211
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
  # ---------------------------------------------------------------------------
216
  # UI helpers
217
  # ---------------------------------------------------------------------------
218
 
219
- def _format_meta(model: str, task: dict[str, Any], idx: int, total: int) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  lines = [
221
- f"**Progress**: {idx + 1} / {total}",
222
  f"**Model**: `{model}`",
223
  f"**task_id**: `{task['task_id']}`",
224
- f"**category**: `{task.get('category', '?')}` • **subcategory**: `{task.get('subcategory', '?')}`",
225
- f"**source_task**: `{task.get('source_task', '?')}`",
226
  ]
227
- if task.get("task_type"):
228
- lines.append(f"**task_type**: `{task['task_type']}`")
229
  return "\n\n".join(lines)
230
 
231
 
232
- def _load_item(pool_order: list[int], idx: int) -> tuple[str, str, str]:
233
- if idx < 0 or idx >= len(pool_order):
234
- return "<div style='padding:24px;color:#888'>All done — no more items.</div>", "**All done!** No more items.", ""
235
- model, task_id = POOL[pool_order[idx]]
236
- task = TASK_BY_ID[task_id]
237
- url = _video_url(model, task_id)
238
- # Render a native <video> so the browser streams directly from HF CDN,
239
- # bypassing gradio's server-side SSRF-protected download (which blocks
240
- # the xet-bridge hostnames).
241
- video_html = (
242
  f'<video src="{url}" controls autoplay loop muted playsinline '
243
- f'style="width:100%;max-height:520px;background:#000;border-radius:8px;">'
244
  f'Your browser does not support HTML5 video.</video>'
245
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  return (
247
- video_html,
248
- _format_meta(model, task, idx, len(pool_order)),
249
  _extract_prompt(task),
 
250
  )
251
 
252
 
253
  # ---------------------------------------------------------------------------
254
- # Gradio callbacks — all return plain Python values (no gr.update mix)
255
  # ---------------------------------------------------------------------------
256
 
257
  def start_session(annotator: str, state: dict):
258
  annotator = (annotator or "").strip()
259
  if not annotator:
260
  return (
261
- state, "", "⚠️ Please enter a name first.", "",
262
- "⚠️ Please enter a name first.", _global_stats_md(),
263
  )
264
- # Filter out (model, task_id) already annotated by this annotator
265
- seen = _annotator_seen(annotator)
266
- order: list[int] = [i for i, (m, t) in enumerate(POOL) if (m, t) not in seen]
267
- rng = random.Random(f"{annotator}-{int(time.time())}")
 
268
  rng.shuffle(order)
269
- state = {"annotator": annotator, "order": order, "idx": 0}
270
- if not order:
271
- status = (
272
- f"🎉 Welcome back `{annotator}` — you have already annotated every item. Nothing left to do!"
273
- )
274
- return (
275
- state,
276
- "<div style='padding:24px;color:#888;text-align:center'>All done!</div>",
277
- "**All done.**",
278
- "",
279
- status,
280
- _global_stats_md(),
281
- )
282
- video_html, meta, prompt = _load_item(order, 0)
283
- skipped = len(POOL) - len(order)
284
- status = (
285
- f"✅ Logged in as `{annotator}` — {len(order)} items to annotate"
286
- + (f" (skipped {skipped} already done)." if skipped else ".")
287
- )
288
- return state, video_html, meta, prompt, status, _global_stats_md()
289
 
290
 
291
- def submit_and_next(state: dict, score: float, note: str):
292
- if not state or "order" not in state:
 
293
  return (
294
- state, "", "⚠️ Please log in first.", "", 3, "",
295
- "⚠️ Not logged in.", _global_stats_md(),
296
  )
297
- order = state["order"]
298
- idx = state["idx"]
299
- if idx >= len(order):
300
  return (
301
- state, "", "**All done!**", "", 3, "",
302
- "No more items.", _global_stats_md(),
303
  )
304
- model, task_id = POOL[order[idx]]
305
  record = {
306
  "timestamp": time.time(),
307
  "timestamp_iso": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()),
@@ -309,111 +362,114 @@ def submit_and_next(state: dict, score: float, note: str):
309
  "process_id": PROCESS_ID,
310
  "model": model,
311
  "task_id": task_id,
312
- "score": int(score),
 
313
  "note": (note or "").strip(),
314
  }
315
  _append_annotation(record)
316
- state["idx"] = idx + 1
317
- video_html, meta, prompt = _load_item(state["order"], state["idx"])
 
 
 
 
318
  return (
319
- state, video_html, meta, prompt, 3, "",
320
- f"✅ Submitted ({state['idx']}). Next →", _global_stats_md(),
 
 
 
321
  )
322
 
323
 
324
  def skip_and_next(state: dict):
325
- if not state or "order" not in state:
 
326
  return (
327
- state, "", "⚠️ Please log in first.", "", 3, "",
328
- "⚠️ Not logged in.", _global_stats_md(),
329
  )
330
- state["idx"] = state["idx"] + 1
331
- video_html, meta, prompt = _load_item(state["order"], state["idx"])
 
 
 
 
 
332
  return (
333
- state, video_html, meta, prompt, 3, "",
334
- f"⏭️ Skipped. Position: {state['idx']}", _global_stats_md(),
 
 
335
  )
336
 
337
 
338
  # ---------------------------------------------------------------------------
339
- # Gradio UI — single page (no visibility toggling)
340
  # ---------------------------------------------------------------------------
341
 
342
- with gr.Blocks(title="MBench-V Annotation", theme=gr.themes.Soft()) as demo:
 
 
 
 
 
343
  gr.Markdown(
344
  """
345
- # 🎬 MBench-V Annotation
346
-
347
- 1. Enter a short name (any string — it tags your submissions).
348
- 2. Click **Start** — a video will appear below.
349
- 3. Give a score (1–5, 5 = best) and optional note; click **Submit & Next**.
350
- 4. Submissions auto-sync to the dataset repo every 5 minutes.
351
 
352
- _Tip_: items you've already annotated are automatically skipped.
 
353
  """
354
  )
355
 
356
- stats_md = gr.Markdown(_global_stats_md())
357
 
358
  state = gr.State()
359
 
360
  with gr.Row():
361
  annotator_in = gr.Textbox(
362
- label="Annotator name",
363
- placeholder="e.g. alice",
364
- scale=4,
365
  )
366
  login_btn = gr.Button("Start", variant="primary", scale=1)
367
 
368
  status_md = gr.Markdown("_Not started yet._")
369
 
370
- with gr.Row():
 
371
  with gr.Column(scale=3):
372
- video = gr.HTML(
373
- value="<div style='padding:24px;color:#888;text-align:center'>"
374
- "Enter your name above and click <b>Start</b> to load the first video."
375
- "</div>",
376
- label="Generated video",
377
- )
378
  with gr.Column(scale=2):
379
- meta_md = gr.Markdown()
380
  prompt_tb = gr.Textbox(
381
  label="Generation prompt (5 segments)",
382
- lines=20,
383
- max_lines=40,
384
- interactive=False,
385
  )
386
- with gr.Column(scale=1):
387
- score = gr.Slider(1, 5, value=3, step=1, label="Score")
388
- note = gr.Textbox(label="Note (optional)", lines=4)
389
- submit_btn = gr.Button("✅ Submit & Next", variant="primary")
390
- skip_btn = gr.Button("⏭️ Skip")
391
-
392
- login_btn.click(
393
- start_session,
394
- inputs=[annotator_in, state],
395
- outputs=[state, video, meta_md, prompt_tb, status_md, stats_md],
396
- api_name=False,
397
- )
398
- annotator_in.submit(
399
- start_session,
400
- inputs=[annotator_in, state],
401
- outputs=[state, video, meta_md, prompt_tb, status_md, stats_md],
402
- api_name=False,
403
- )
404
- submit_btn.click(
405
- submit_and_next,
406
- inputs=[state, score, note],
407
- outputs=[state, video, meta_md, prompt_tb, score, note, status_md, stats_md],
408
- api_name=False,
409
- )
410
- skip_btn.click(
411
- skip_and_next,
412
- inputs=[state],
413
- outputs=[state, video, meta_md, prompt_tb, score, note, status_md, stats_md],
414
- api_name=False,
415
- )
416
 
417
 
418
  if __name__ == "__main__":
419
- demo.queue(default_concurrency_limit=8).launch()
 
1
  """MBench-V annotation UI (Gradio Space).
2
 
3
+ Streams videos from `studyOverflow/TempMemoryData`; writes annotations back to
4
+ `annotations/` on the same repo (batched via `CommitScheduler`, every 5 min).
5
+
6
+ Key design:
7
+ - Binary task: "Does this video exhibit a memory issue?" (Yes / No)
8
+ - No-overlap work queue: each (model, task_id) is claimed by at most one
9
+ annotator at a time. Submitted items stay claimed forever; pending
10
+ claims expire after PENDING_TIMEOUT_MIN so a disconnected user doesn't
11
+ hold a task hostage.
12
+ - Per-session randomized full-pool shuffle, each annotator skips over
13
+ already-claimed items until they find their next fresh one. So the
14
+ order they see is fully randomized and there is zero assignment logic.
15
  """
16
 
17
  from __future__ import annotations
 
19
  import json
20
  import os
21
  import random
22
+ import threading
23
  import time
24
  import uuid
25
  from pathlib import Path
26
  from typing import Any
27
 
28
  import gradio as gr
29
+ from huggingface_hub import CommitScheduler, HfApi, hf_hub_download, hf_hub_url
30
 
31
  # ---------------------------------------------------------------------------
32
  # Config
 
35
  DATASET_REPO = "studyOverflow/TempMemoryData"
36
  MERGED_JSON_PATH = "MBench-V/merged.json"
37
 
 
 
38
  MODELS: list[str] = [
39
  "causal_forcing",
40
  "self_forcing",
 
52
  ANN_DIR.mkdir(exist_ok=True)
53
  PROCESS_ID = uuid.uuid4().hex[:8]
54
  ANN_FILE = ANN_DIR / f"ann_{PROCESS_ID}.jsonl"
55
+
56
+ COMMIT_INTERVAL_MIN = 5 # CommitScheduler push period
57
+ PENDING_TIMEOUT_SEC = 30 * 60 # a pending (claimed but unsubmitted) item expires after this
58
 
59
 
60
  # ---------------------------------------------------------------------------
 
63
 
64
  def _load_merged() -> list[dict[str, Any]]:
65
  local = hf_hub_download(
66
+ repo_id=DATASET_REPO, filename=MERGED_JSON_PATH,
67
+ repo_type="dataset", token=HF_TOKEN,
 
 
68
  )
69
  with open(local, encoding="utf-8") as f:
70
  return json.load(f)
 
73
  TASKS: list[dict[str, Any]] = _load_merged()
74
  TASK_BY_ID: dict[str, dict[str, Any]] = {t["task_id"]: t for t in TASKS}
75
 
76
+ POOL: list[tuple[str, str]] = [(m, t["task_id"]) for m in MODELS for t in TASKS]
77
+ POOL_SET: set[tuple[str, str]] = set(POOL)
78
+ print(f"[mbench-ann] loaded {len(TASKS)} tasks × {len(MODELS)} models = {len(POOL)} items")
79
 
 
 
80
 
81
+ def _video_url(model: str, task_id: str) -> str:
82
+ return hf_hub_url(
83
+ DATASET_REPO,
84
+ filename=f"MBench-V/{model}/videos/{task_id}.mp4",
85
+ repo_type="dataset",
86
+ )
87
+
88
+
89
+ def _extract_prompt(task: dict[str, Any]) -> str:
90
+ """Always return the 5-segment (level_3) prompt, joined with segment headers."""
91
  gp = task.get("generation_prompts") or {}
92
  prompts = gp.get("prompts") or {}
 
93
  for level in ("level_3", "level_4", "level_2", "level_1"):
94
  val = prompts.get(level)
95
  if isinstance(val, list) and val:
96
  n = len(val)
97
+ return "\n\n".join(f"— Segment {i}/{n} —\n{seg}" for i, seg in enumerate(val, 1))
 
 
 
98
  if isinstance(val, str) and val:
99
  return val
100
  return "(no prompt found)"
101
 
102
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  # ---------------------------------------------------------------------------
104
  # CommitScheduler
105
  # ---------------------------------------------------------------------------
 
107
  scheduler: CommitScheduler | None = None
108
  if HF_TOKEN:
109
  scheduler = CommitScheduler(
110
+ repo_id=DATASET_REPO, repo_type="dataset",
111
+ folder_path=str(ANN_DIR), path_in_repo="annotations",
112
+ every=COMMIT_INTERVAL_MIN, token=HF_TOKEN,
113
+ private=False, squash_history=False,
 
 
 
 
114
  )
115
  print(f"[mbench-ann] CommitScheduler started (every {COMMIT_INTERVAL_MIN} min)")
116
  else:
117
  print("[mbench-ann] WARNING: HF_TOKEN not set — annotations stay local only")
118
 
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  # ---------------------------------------------------------------------------
121
+ # Historical annotations bootstrap the "submitted" set
122
  # ---------------------------------------------------------------------------
123
 
124
  def _fetch_remote_annotations() -> list[dict[str, Any]]:
 
 
 
 
 
 
125
  records: list[dict[str, Any]] = []
126
  try:
127
  api = HfApi(token=HF_TOKEN)
128
+ files = api.list_repo_files(repo_id=DATASET_REPO, repo_type="dataset")
 
 
129
  except Exception as e:
130
  print(f"[mbench-ann] list_repo_files failed: {e}")
131
  return records
 
155
  print(f"[mbench-ann] loaded {len(HISTORICAL_ANNOTATIONS)} historical annotation records")
156
 
157
 
158
+ # ---------------------------------------------------------------------------
159
+ # Shared work-queue state
160
+ # ---------------------------------------------------------------------------
161
+ # STATE_LOCK guards SUBMITTED and PENDING. Both are modified on every login,
162
+ # skip, submit, and prompt-for-next-item.
 
 
163
 
164
+ STATE_LOCK = threading.Lock()
165
 
166
+ # (model, task_id) keys of items already submitted (permanently consumed).
167
+ SUBMITTED: set[tuple[str, str]] = {
168
+ (r["model"], r["task_id"])
169
+ for r in HISTORICAL_ANNOTATIONS
170
+ if "model" in r and "task_id" in r and (r["model"], r["task_id"]) in POOL_SET
171
+ }
 
 
172
 
173
+ # Currently claimed-but-not-submitted: {(model, task_id): (annotator, claim_ts)}
174
+ PENDING: dict[tuple[str, str], tuple[str, float]] = {}
175
 
176
+ print(f"[mbench-ann] work-queue bootstrap: submitted={len(SUBMITTED)}, remaining={len(POOL) - len(SUBMITTED)}")
177
+
178
+
179
+ def _reap_expired_pending() -> None:
180
+ """Drop entries in PENDING that have passed the timeout. Caller must hold STATE_LOCK."""
181
+ now = time.time()
182
+ expired = [k for k, (_, ts) in PENDING.items() if now - ts > PENDING_TIMEOUT_SEC]
183
+ for k in expired:
184
+ PENDING.pop(k, None)
185
+ if expired:
186
+ print(f"[mbench-ann] reaped {len(expired)} expired pending items")
187
+
188
+
189
+ def _claim_next(annotator: str, order: list[int], idx_start: int) -> tuple[int, tuple[str, str] | None]:
190
+ """Find the first unclaimed pool index at or after idx_start and claim it.
191
+
192
+ Returns (new_idx, (model, task_id)) or (len(order), None) if none left.
193
+ Atomic under STATE_LOCK.
194
+ """
195
+ with STATE_LOCK:
196
+ _reap_expired_pending()
197
+ idx = idx_start
198
+ while idx < len(order):
199
+ mt = POOL[order[idx]]
200
+ if mt in SUBMITTED or mt in PENDING:
201
+ idx += 1
202
+ continue
203
+ # Claim it.
204
+ PENDING[mt] = (annotator, time.time())
205
+ return idx, mt
206
+ return len(order), None
207
+
208
+
209
+ def _release_pending(annotator: str, mt: tuple[str, str] | None, *, reason: str) -> None:
210
+ """Give a claim back to the pool. No-op if the claim isn't owned by this annotator."""
211
+ if mt is None:
212
+ return
213
+ with STATE_LOCK:
214
+ entry = PENDING.get(mt)
215
+ if entry and entry[0] == annotator:
216
+ PENDING.pop(mt, None)
217
+
218
+
219
+ def _record_submit(annotator: str, mt: tuple[str, str]) -> None:
220
+ """Mark a claim as permanently submitted."""
221
+ with STATE_LOCK:
222
+ PENDING.pop(mt, None)
223
+ SUBMITTED.add(mt)
224
 
225
 
226
+ def _append_annotation(record: dict[str, Any]) -> None:
227
+ line = json.dumps(record, ensure_ascii=False)
228
+ if scheduler is not None:
229
+ with scheduler.lock:
230
+ with ANN_FILE.open("a", encoding="utf-8") as f:
231
+ f.write(line + "\n")
232
+ else:
233
+ with ANN_FILE.open("a", encoding="utf-8") as f:
234
+ f.write(line + "\n")
235
+ HISTORICAL_ANNOTATIONS.append(record)
236
+
237
 
238
  # ---------------------------------------------------------------------------
239
  # UI helpers
240
  # ---------------------------------------------------------------------------
241
 
242
+ def _stats_md() -> str:
243
+ with STATE_LOCK:
244
+ _reap_expired_pending()
245
+ n_sub = len(SUBMITTED)
246
+ n_pend = len(PENDING)
247
+ total = len(POOL)
248
+ pct = 100.0 * n_sub / total if total else 0.0
249
+ return (
250
+ f"**Global progress**: {n_sub} / {total} submitted ({pct:.1f}%)"
251
+ f" • {n_pend} currently being annotated by someone"
252
+ f" • {total - n_sub - n_pend} left"
253
+ )
254
+
255
+
256
+ def _meta_md(model: str, task: dict[str, Any], state: dict) -> str:
257
+ done_by_me = state.get("submitted_count", 0)
258
+ claimed_n = state.get("claimed_n", 0)
259
  lines = [
260
+ f"**Your submissions this session**: {done_by_me} (claimed {claimed_n} so far)",
261
  f"**Model**: `{model}`",
262
  f"**task_id**: `{task['task_id']}`",
 
 
263
  ]
 
 
264
  return "\n\n".join(lines)
265
 
266
 
267
+ def _render_video_html(url: str) -> str:
268
+ return (
 
 
 
 
 
 
 
 
269
  f'<video src="{url}" controls autoplay loop muted playsinline '
270
+ f'style="width:100%;height:520px;background:#000;border-radius:8px;object-fit:contain;">'
271
  f'Your browser does not support HTML5 video.</video>'
272
  )
273
+
274
+
275
+ PLACEHOLDER_VIDEO = (
276
+ "<div style='height:520px;display:flex;align-items:center;justify-content:center;"
277
+ "color:#888;background:#111;border-radius:8px;'>"
278
+ "Enter your name and click <b>Start</b> to load a video."
279
+ "</div>"
280
+ )
281
+
282
+ ALL_DONE_VIDEO = (
283
+ "<div style='height:520px;display:flex;align-items:center;justify-content:center;"
284
+ "color:#4a7;background:#111;border-radius:8px;font-size:18px;'>"
285
+ "🎉 All items have been annotated. Thank you!"
286
+ "</div>"
287
+ )
288
+
289
+
290
+ def _next_item_for(state: dict) -> tuple[str, str, str, dict]:
291
+ """Advance `state` to the next unclaimed item and return render strings.
292
+
293
+ Returns (video_html, meta_md, prompt_text, updated_state).
294
+ """
295
+ annotator = state["annotator"]
296
+ order = state["order"]
297
+ idx = state.get("idx", 0)
298
+
299
+ # Release the currently-held claim if any (e.g. after submit the caller
300
+ # already released it; on first call there's nothing to release).
301
+ new_idx, mt = _claim_next(annotator, order, idx)
302
+ state["idx"] = new_idx
303
+ if mt is None:
304
+ state["current"] = None
305
+ return ALL_DONE_VIDEO, "**All done.** Nothing left in the pool.", "", state
306
+
307
+ state["current"] = mt
308
+ state["claimed_n"] = state.get("claimed_n", 0) + 1
309
+ model, task_id = mt
310
+ task = TASK_BY_ID[task_id]
311
  return (
312
+ _render_video_html(_video_url(model, task_id)),
313
+ _meta_md(model, task, state),
314
  _extract_prompt(task),
315
+ state,
316
  )
317
 
318
 
319
  # ---------------------------------------------------------------------------
320
+ # Gradio callbacks
321
  # ---------------------------------------------------------------------------
322
 
323
  def start_session(annotator: str, state: dict):
324
  annotator = (annotator or "").strip()
325
  if not annotator:
326
  return (
327
+ state, PLACEHOLDER_VIDEO, "", "",
328
+ "⚠️ Please enter a name first.", _stats_md(),
329
  )
330
+ # Release any claim an older session of this user still holds in-memory.
331
+ # (We can't detect cross-browser sessions of the same user, but within one
332
+ # process a user holding an old claim and logging in again will free it.)
333
+ order = list(range(len(POOL)))
334
+ rng = random.Random(f"{annotator}-{int(time.time())}-{uuid.uuid4().hex}")
335
  rng.shuffle(order)
336
+ state = {"annotator": annotator, "order": order, "idx": 0,
337
+ "current": None, "submitted_count": 0, "claimed_n": 0}
338
+ video_html, meta, prompt, state = _next_item_for(state)
339
+ status = f" Welcome `{annotator}`. Good luck!"
340
+ return state, video_html, meta, prompt, status, _stats_md()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
 
343
+ def submit_and_next(state: dict, verdict: str, note: str):
344
+ """Record current claim as submitted, then advance."""
345
+ if not state or "annotator" not in state:
346
  return (
347
+ state, PLACEHOLDER_VIDEO, "", "", "No", "",
348
+ "⚠️ Please log in first.", _stats_md(),
349
  )
350
+ current = state.get("current")
351
+ if current is None:
352
+ # No active claim — nothing to submit.
353
  return (
354
+ state, ALL_DONE_VIDEO, "**All done.**", "", "No", "",
355
+ "No active item to submit.", _stats_md(),
356
  )
357
+ model, task_id = current
358
  record = {
359
  "timestamp": time.time(),
360
  "timestamp_iso": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()),
 
362
  "process_id": PROCESS_ID,
363
  "model": model,
364
  "task_id": task_id,
365
+ "memory_issue": verdict == "Yes", # boolean
366
+ "verdict": verdict, # "Yes" / "No" raw
367
  "note": (note or "").strip(),
368
  }
369
  _append_annotation(record)
370
+ _record_submit(state["annotator"], current)
371
+ state["submitted_count"] = state.get("submitted_count", 0) + 1
372
+ state["idx"] = state["idx"] + 1
373
+ state["current"] = None
374
+
375
+ video_html, meta, prompt, state = _next_item_for(state)
376
  return (
377
+ state, video_html, meta, prompt,
378
+ "No", # reset verdict
379
+ "", # reset note
380
+ f"✅ Submitted ({state['submitted_count']}) → next",
381
+ _stats_md(),
382
  )
383
 
384
 
385
  def skip_and_next(state: dict):
386
+ """Release the current claim back to the pool and advance."""
387
+ if not state or "annotator" not in state:
388
  return (
389
+ state, PLACEHOLDER_VIDEO, "", "", "No", "",
390
+ "⚠️ Please log in first.", _stats_md(),
391
  )
392
+ current = state.get("current")
393
+ if current is not None:
394
+ _release_pending(state["annotator"], current, reason="skip")
395
+ state["idx"] = state.get("idx", 0) + 1
396
+ state["current"] = None
397
+
398
+ video_html, meta, prompt, state = _next_item_for(state)
399
  return (
400
+ state, video_html, meta, prompt,
401
+ "No", "",
402
+ f"⏭️ Skipped. Your submissions: {state.get('submitted_count', 0)}",
403
+ _stats_md(),
404
  )
405
 
406
 
407
  # ---------------------------------------------------------------------------
408
+ # UI
409
  # ---------------------------------------------------------------------------
410
 
411
+ CUSTOM_CSS = """
412
+ #prompt_box textarea { height: 480px !important; overflow-y: auto !important; }
413
+ """
414
+
415
+ with gr.Blocks(title="MBench-V Annotation", theme=gr.themes.Soft(),
416
+ css=CUSTOM_CSS) as demo:
417
  gr.Markdown(
418
  """
419
+ # 🎬 MBench-V Annotation — *Does this video exhibit a memory issue?*
 
 
 
 
 
420
 
421
+ Watch the video (left) against its intended 5-segment prompt (right) and
422
+ answer **Yes** or **No**.
423
  """
424
  )
425
 
426
+ stats_md = gr.Markdown(_stats_md())
427
 
428
  state = gr.State()
429
 
430
  with gr.Row():
431
  annotator_in = gr.Textbox(
432
+ label="Annotator name", placeholder="e.g. alice", scale=4, autofocus=True,
 
 
433
  )
434
  login_btn = gr.Button("Start", variant="primary", scale=1)
435
 
436
  status_md = gr.Markdown("_Not started yet._")
437
 
438
+ # ========= Top row: video ↔ prompt =========
439
+ with gr.Row(equal_height=True):
440
  with gr.Column(scale=3):
441
+ video = gr.HTML(value=PLACEHOLDER_VIDEO, label="Generated video")
 
 
 
 
 
442
  with gr.Column(scale=2):
 
443
  prompt_tb = gr.Textbox(
444
  label="Generation prompt (5 segments)",
445
+ lines=22, max_lines=22,
446
+ interactive=False, show_copy_button=True,
447
+ elem_id="prompt_box",
448
  )
449
+
450
+ # ========= Bottom row: meta verdict =========
451
+ with gr.Row():
452
+ with gr.Column(scale=3):
453
+ meta_md = gr.Markdown("_No task loaded yet._")
454
+ with gr.Column(scale=2):
455
+ verdict = gr.Radio(
456
+ choices=["No", "Yes"], value="No",
457
+ label="Does this video exhibit a memory issue?",
458
+ )
459
+ note = gr.Textbox(label="Note (optional)", lines=2)
460
+ with gr.Row():
461
+ submit_btn = gr.Button("✅ Submit & Next", variant="primary")
462
+ skip_btn = gr.Button("⏭️ Skip")
463
+
464
+ # ========= Wiring =========
465
+ login_outputs = [state, video, meta_md, prompt_tb, status_md, stats_md]
466
+ step_outputs = [state, video, meta_md, prompt_tb, verdict, note, status_md, stats_md]
467
+
468
+ login_btn.click(start_session, inputs=[annotator_in, state], outputs=login_outputs, api_name=False)
469
+ annotator_in.submit(start_session, inputs=[annotator_in, state], outputs=login_outputs, api_name=False)
470
+ submit_btn.click(submit_and_next, inputs=[state, verdict, note], outputs=step_outputs, api_name=False)
471
+ skip_btn.click(skip_and_next, inputs=[state], outputs=step_outputs, api_name=False)
 
 
 
 
 
 
 
472
 
473
 
474
  if __name__ == "__main__":
475
+ demo.queue(default_concurrency_limit=16).launch()