databoysu commited on
Commit
e5add8c
·
1 Parent(s): b45a9cb
Files changed (1) hide show
  1. vision_ui.py +30 -36
vision_ui.py CHANGED
@@ -31,7 +31,7 @@ HARD_CHOICES = [t.get("name") for t in TASKS_BY_DIFFICULTY.get("hard", []) if t.
31
  ROOT_DIR = Path(__file__).resolve().parent
32
  INFERENCE_PATH = ROOT_DIR / "inference.py"
33
  BACKEND_HOST = "127.0.0.1"
34
- BACKEND_PORT = 8000
35
  GRADIO_HOST = "0.0.0.0"
36
  GRADIO_PORT = 7860
37
 
@@ -287,7 +287,6 @@ def _build_env(
287
 
288
 
289
  def sync_tasks(selected, grid_name):
290
- # Depending on which grid was clicked, clear the others and fetch code
291
  if grid_name == "easy":
292
  easy_val = selected
293
  med_val = None
@@ -306,6 +305,7 @@ def sync_tasks(selected, grid_name):
306
 
307
  code_content = _code_from_task_name(selected)
308
  hud_content = _update_hud_badge(selected, diff)
 
309
 
310
  return (
311
  selected,
@@ -313,27 +313,25 @@ def sync_tasks(selected, grid_name):
313
  gr.update(value=med_val),
314
  gr.update(value=hard_val),
315
  hud_content,
 
316
  code_content
317
  )
318
 
319
  def validate_and_start(token):
320
  if not token or not token.strip():
321
- # Empty token: halt execution, render alert, push an error state early
322
  return (
323
  gr.update(elem_classes=["token-alert"]),
324
  gr.update(value="ERROR: Token Required"),
325
  False
326
  )
327
- # Valid token
328
  return (
329
  gr.update(elem_classes=[]),
330
  gr.update(value="RUNNING...", elem_id="execute-btn-running", interactive=False),
331
  True
332
  )
333
 
334
- def _reset_run_state(task_name):
335
  return (
336
- _code_from_task_name(task_name),
337
  _terminal_html([("c-muted", "Boot sequence initialized...")]),
338
  "<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Running...</div>"
339
  )
@@ -348,19 +346,17 @@ def run_agent(
348
  success_score_threshold: float,
349
  show_thought: bool,
350
  proceed: bool
351
- ) -> Generator[tuple[str, str, str, dict], None, None]:
352
 
353
  if not proceed:
354
- # User didn't pass auth check
355
- yield (gr.skip(), gr.skip(), gr.skip(), gr.update(value="Run Debugging Agent", interactive=True))
356
  return
357
 
358
- code_view = _code_from_task_name(task_name)
359
  terminal_lines: list[tuple[str, str]] = []
360
  terminal_lines.append(("c-muted", "Agent initialized... infiltrating target."))
361
 
362
  result_html = "<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Awaiting end...</div>"
363
- yield code_view, _terminal_html(terminal_lines), result_html, gr.update()
364
 
365
  cmd = [sys.executable, str(INFERENCE_PATH)]
366
  if show_thought:
@@ -389,7 +385,9 @@ def run_agent(
389
 
390
  ended_streams: set[str] = set()
391
  thought_mode = False
392
- active_task_name = task_name
 
 
393
 
394
  while True:
395
  try:
@@ -431,10 +429,6 @@ def run_agent(
431
  end_match = END_RE.match(line)
432
 
433
  if start_match:
434
- active_task_name = start_match.group("task").strip()
435
- task_preview = _code_from_task_name(active_task_name)
436
- if "Waiting for selection" not in task_preview:
437
- code_view = task_preview
438
  terminal_lines.append(("c-start", line))
439
  elif step_match:
440
  err = step_match.group("error")
@@ -450,16 +444,17 @@ def run_agent(
450
  result_html = _large_metric_html(success, score_value, final_steps, rewards_raw or 'none')
451
 
452
  if success:
453
- solved = _solution_from_task_name(active_task_name)
 
454
  if solved:
455
- code_view = solved
456
  else:
457
  terminal_lines.append(("c-muted", line))
458
 
459
  if len(terminal_lines) > 500:
460
  terminal_lines = terminal_lines[-500:]
461
 
462
- yield code_view, _terminal_html(terminal_lines), result_html, gr.update()
463
 
464
  return_code = process.wait(timeout=2)
465
  if return_code != 0:
@@ -469,7 +464,13 @@ def run_agent(
469
  if len(terminal_lines) > 500:
470
  terminal_lines = terminal_lines[-500:]
471
 
472
- yield code_view, _terminal_html(terminal_lines), result_html, gr.update(value="Run Debugging Agent", elem_id="execute-btn", interactive=True)
 
 
 
 
 
 
473
 
474
 
475
  with gr.Blocks(title="TraceFix-RL") as demo:
@@ -497,7 +498,7 @@ with gr.Blocks(title="TraceFix-RL") as demo:
497
  with gr.Accordion("Engine Parameters", open=False):
498
  model_name = gr.Textbox(label="Model Name", value=os.getenv("MODEL_NAME", "openai/gpt-oss-20b"))
499
  api_base_url = gr.Textbox(label="API Base URL", value=os.getenv("API_BASE_URL", "https://router.huggingface.co/v1"))
500
- env_base_url = gr.Textbox(label="Env Base URL", value=os.getenv("ENV_BASE_URL", f"http://{BACKEND_HOST}:{BACKEND_PORT}"))
501
  max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
502
  success_score_threshold = gr.Number(
503
  label="Success Score Threshold",
@@ -513,17 +514,11 @@ with gr.Blocks(title="TraceFix-RL") as demo:
513
  hard_radio = gr.Radio(choices=HARD_CHOICES, label="Hard Targets", elem_id="hard-radio")
514
 
515
  hud_badge = gr.HTML(_update_hud_badge("", ""))
516
- run_button = gr.Button("Run Debugging Agent", elem_id="execute-btn", variant="primary")
517
-
518
- # Sync tasks correctly
519
- easy_radio.change(lambda x: sync_tasks(x, "easy"), inputs=[easy_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view] if 'code_view' in locals() else None)
520
- medium_radio.change(lambda x: sync_tasks(x, "medium"), inputs=[medium_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view] if 'code_view' in locals() else None)
521
- hard_radio.change(lambda x: sync_tasks(x, "hard"), inputs=[hard_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view] if 'code_view' in locals() else None)
522
-
523
 
524
  with gr.Row(equal_height=True):
525
  with gr.Column(scale=1, elem_classes=["panel", "code-panel"]):
526
- gr.HTML("<div class='panel-title'>Sandbox Source Code</div>")
527
  code_view = gr.Code(
528
  language="python",
529
  interactive=False,
@@ -539,10 +534,9 @@ with gr.Blocks(title="TraceFix-RL") as demo:
539
  with gr.Row(elem_classes=["panel"]):
540
  result_block = gr.HTML("<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Awaiting Execution</div>")
541
 
542
- # Due to 'code_view' not being defined above when change was defined, we must re-bind the change events to include code_view.
543
- easy_radio.change(lambda x: sync_tasks(x, "easy"), inputs=[easy_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view])
544
- medium_radio.change(lambda x: sync_tasks(x, "medium"), inputs=[medium_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view])
545
- hard_radio.change(lambda x: sync_tasks(x, "hard"), inputs=[hard_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view])
546
 
547
  # Run Sequence
548
  run_state = gr.State(value=True)
@@ -556,8 +550,8 @@ with gr.Blocks(title="TraceFix-RL") as demo:
556
 
557
  reset_step = validate_step.then(
558
  _reset_run_state,
559
- inputs=[selected_task_state],
560
- outputs=[code_view, terminal, result_block],
561
  queue=False,
562
  )
563
 
@@ -574,5 +568,5 @@ with gr.Blocks(title="TraceFix-RL") as demo:
574
  show_thought,
575
  run_state
576
  ],
577
- outputs=[code_view, terminal, result_block, run_button],
578
  )
 
31
  ROOT_DIR = Path(__file__).resolve().parent
32
  INFERENCE_PATH = ROOT_DIR / "inference.py"
33
  BACKEND_HOST = "127.0.0.1"
34
+ BACKEND_PORT = 7860
35
  GRADIO_HOST = "0.0.0.0"
36
  GRADIO_PORT = 7860
37
 
 
287
 
288
 
289
  def sync_tasks(selected, grid_name):
 
290
  if grid_name == "easy":
291
  easy_val = selected
292
  med_val = None
 
305
 
306
  code_content = _code_from_task_name(selected)
307
  hud_content = _update_hud_badge(selected, diff)
308
+ title_content = "<div class='panel-title'>Target Source Code (Buggy)</div>"
309
 
310
  return (
311
  selected,
 
313
  gr.update(value=med_val),
314
  gr.update(value=hard_val),
315
  hud_content,
316
+ title_content,
317
  code_content
318
  )
319
 
320
  def validate_and_start(token):
321
  if not token or not token.strip():
 
322
  return (
323
  gr.update(elem_classes=["token-alert"]),
324
  gr.update(value="ERROR: Token Required"),
325
  False
326
  )
 
327
  return (
328
  gr.update(elem_classes=[]),
329
  gr.update(value="RUNNING...", elem_id="execute-btn-running", interactive=False),
330
  True
331
  )
332
 
333
+ def _reset_run_state():
334
  return (
 
335
  _terminal_html([("c-muted", "Boot sequence initialized...")]),
336
  "<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Running...</div>"
337
  )
 
346
  success_score_threshold: float,
347
  show_thought: bool,
348
  proceed: bool
349
+ ) -> Generator[tuple[Any, str, str, dict, Any], None, None]:
350
 
351
  if not proceed:
352
+ yield (gr.skip(), gr.skip(), gr.skip(), gr.update(value="INITIATE TRACE RESOLUTION", interactive=True), gr.skip())
 
353
  return
354
 
 
355
  terminal_lines: list[tuple[str, str]] = []
356
  terminal_lines.append(("c-muted", "Agent initialized... infiltrating target."))
357
 
358
  result_html = "<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Awaiting end...</div>"
359
+ yield gr.skip(), _terminal_html(terminal_lines), result_html, gr.update(), gr.skip()
360
 
361
  cmd = [sys.executable, str(INFERENCE_PATH)]
362
  if show_thought:
 
385
 
386
  ended_streams: set[str] = set()
387
  thought_mode = False
388
+
389
+ final_success = False
390
+ final_solved_code = None
391
 
392
  while True:
393
  try:
 
429
  end_match = END_RE.match(line)
430
 
431
  if start_match:
 
 
 
 
432
  terminal_lines.append(("c-start", line))
433
  elif step_match:
434
  err = step_match.group("error")
 
444
  result_html = _large_metric_html(success, score_value, final_steps, rewards_raw or 'none')
445
 
446
  if success:
447
+ final_success = True
448
+ solved = _solution_from_task_name(task_name)
449
  if solved:
450
+ final_solved_code = solved
451
  else:
452
  terminal_lines.append(("c-muted", line))
453
 
454
  if len(terminal_lines) > 500:
455
  terminal_lines = terminal_lines[-500:]
456
 
457
+ yield gr.skip(), _terminal_html(terminal_lines), result_html, gr.update(), gr.skip()
458
 
459
  return_code = process.wait(timeout=2)
460
  if return_code != 0:
 
464
  if len(terminal_lines) > 500:
465
  terminal_lines = terminal_lines[-500:]
466
 
467
+ code_update = gr.skip()
468
+ title_update = gr.skip()
469
+ if final_success and final_solved_code is not None:
470
+ code_update = final_solved_code
471
+ title_update = "<div class='panel-title'>Target Source Code (Resolved)</div>"
472
+
473
+ yield code_update, _terminal_html(terminal_lines), result_html, gr.update(value="INITIATE TRACE RESOLUTION", elem_id="execute-btn", interactive=True), title_update
474
 
475
 
476
  with gr.Blocks(title="TraceFix-RL") as demo:
 
498
  with gr.Accordion("Engine Parameters", open=False):
499
  model_name = gr.Textbox(label="Model Name", value=os.getenv("MODEL_NAME", "openai/gpt-oss-20b"))
500
  api_base_url = gr.Textbox(label="API Base URL", value=os.getenv("API_BASE_URL", "https://router.huggingface.co/v1"))
501
+ env_base_url = gr.Textbox(label="Env Base URL", value=os.getenv("ENV_BASE_URL", f"http://127.0.0.1:{BACKEND_PORT}"))
502
  max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
503
  success_score_threshold = gr.Number(
504
  label="Success Score Threshold",
 
514
  hard_radio = gr.Radio(choices=HARD_CHOICES, label="Hard Targets", elem_id="hard-radio")
515
 
516
  hud_badge = gr.HTML(_update_hud_badge("", ""))
517
+ run_button = gr.Button("INITIATE TRACE RESOLUTION", elem_id="execute-btn", variant="primary")
 
 
 
 
 
 
518
 
519
  with gr.Row(equal_height=True):
520
  with gr.Column(scale=1, elem_classes=["panel", "code-panel"]):
521
+ code_panel_title = gr.HTML("<div class='panel-title'>Target Source Code (Buggy)</div>")
522
  code_view = gr.Code(
523
  language="python",
524
  interactive=False,
 
534
  with gr.Row(elem_classes=["panel"]):
535
  result_block = gr.HTML("<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Awaiting Execution</div>")
536
 
537
+ easy_radio.change(lambda x: sync_tasks(x, "easy"), inputs=[easy_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_panel_title, code_view])
538
+ medium_radio.change(lambda x: sync_tasks(x, "medium"), inputs=[medium_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_panel_title, code_view])
539
+ hard_radio.change(lambda x: sync_tasks(x, "hard"), inputs=[hard_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_panel_title, code_view])
 
540
 
541
  # Run Sequence
542
  run_state = gr.State(value=True)
 
550
 
551
  reset_step = validate_step.then(
552
  _reset_run_state,
553
+ inputs=[],
554
+ outputs=[terminal, result_block],
555
  queue=False,
556
  )
557
 
 
568
  show_thought,
569
  run_state
570
  ],
571
+ outputs=[code_view, terminal, result_block, run_button, code_panel_title],
572
  )