Spaces:
Sleeping
Sleeping
databoysu commited on
Commit ·
e5add8c
1
Parent(s): b45a9cb
UI reset
Browse files- vision_ui.py +30 -36
vision_ui.py
CHANGED
|
@@ -31,7 +31,7 @@ HARD_CHOICES = [t.get("name") for t in TASKS_BY_DIFFICULTY.get("hard", []) if t.
|
|
| 31 |
ROOT_DIR = Path(__file__).resolve().parent
|
| 32 |
INFERENCE_PATH = ROOT_DIR / "inference.py"
|
| 33 |
BACKEND_HOST = "127.0.0.1"
|
| 34 |
-
BACKEND_PORT =
|
| 35 |
GRADIO_HOST = "0.0.0.0"
|
| 36 |
GRADIO_PORT = 7860
|
| 37 |
|
|
@@ -287,7 +287,6 @@ def _build_env(
|
|
| 287 |
|
| 288 |
|
| 289 |
def sync_tasks(selected, grid_name):
|
| 290 |
-
# Depending on which grid was clicked, clear the others and fetch code
|
| 291 |
if grid_name == "easy":
|
| 292 |
easy_val = selected
|
| 293 |
med_val = None
|
|
@@ -306,6 +305,7 @@ def sync_tasks(selected, grid_name):
|
|
| 306 |
|
| 307 |
code_content = _code_from_task_name(selected)
|
| 308 |
hud_content = _update_hud_badge(selected, diff)
|
|
|
|
| 309 |
|
| 310 |
return (
|
| 311 |
selected,
|
|
@@ -313,27 +313,25 @@ def sync_tasks(selected, grid_name):
|
|
| 313 |
gr.update(value=med_val),
|
| 314 |
gr.update(value=hard_val),
|
| 315 |
hud_content,
|
|
|
|
| 316 |
code_content
|
| 317 |
)
|
| 318 |
|
| 319 |
def validate_and_start(token):
|
| 320 |
if not token or not token.strip():
|
| 321 |
-
# Empty token: halt execution, render alert, push an error state early
|
| 322 |
return (
|
| 323 |
gr.update(elem_classes=["token-alert"]),
|
| 324 |
gr.update(value="ERROR: Token Required"),
|
| 325 |
False
|
| 326 |
)
|
| 327 |
-
# Valid token
|
| 328 |
return (
|
| 329 |
gr.update(elem_classes=[]),
|
| 330 |
gr.update(value="RUNNING...", elem_id="execute-btn-running", interactive=False),
|
| 331 |
True
|
| 332 |
)
|
| 333 |
|
| 334 |
-
def _reset_run_state(
|
| 335 |
return (
|
| 336 |
-
_code_from_task_name(task_name),
|
| 337 |
_terminal_html([("c-muted", "Boot sequence initialized...")]),
|
| 338 |
"<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Running...</div>"
|
| 339 |
)
|
|
@@ -348,19 +346,17 @@ def run_agent(
|
|
| 348 |
success_score_threshold: float,
|
| 349 |
show_thought: bool,
|
| 350 |
proceed: bool
|
| 351 |
-
) -> Generator[tuple[
|
| 352 |
|
| 353 |
if not proceed:
|
| 354 |
-
|
| 355 |
-
yield (gr.skip(), gr.skip(), gr.skip(), gr.update(value="Run Debugging Agent", interactive=True))
|
| 356 |
return
|
| 357 |
|
| 358 |
-
code_view = _code_from_task_name(task_name)
|
| 359 |
terminal_lines: list[tuple[str, str]] = []
|
| 360 |
terminal_lines.append(("c-muted", "Agent initialized... infiltrating target."))
|
| 361 |
|
| 362 |
result_html = "<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Awaiting end...</div>"
|
| 363 |
-
yield
|
| 364 |
|
| 365 |
cmd = [sys.executable, str(INFERENCE_PATH)]
|
| 366 |
if show_thought:
|
|
@@ -389,7 +385,9 @@ def run_agent(
|
|
| 389 |
|
| 390 |
ended_streams: set[str] = set()
|
| 391 |
thought_mode = False
|
| 392 |
-
|
|
|
|
|
|
|
| 393 |
|
| 394 |
while True:
|
| 395 |
try:
|
|
@@ -431,10 +429,6 @@ def run_agent(
|
|
| 431 |
end_match = END_RE.match(line)
|
| 432 |
|
| 433 |
if start_match:
|
| 434 |
-
active_task_name = start_match.group("task").strip()
|
| 435 |
-
task_preview = _code_from_task_name(active_task_name)
|
| 436 |
-
if "Waiting for selection" not in task_preview:
|
| 437 |
-
code_view = task_preview
|
| 438 |
terminal_lines.append(("c-start", line))
|
| 439 |
elif step_match:
|
| 440 |
err = step_match.group("error")
|
|
@@ -450,16 +444,17 @@ def run_agent(
|
|
| 450 |
result_html = _large_metric_html(success, score_value, final_steps, rewards_raw or 'none')
|
| 451 |
|
| 452 |
if success:
|
| 453 |
-
|
|
|
|
| 454 |
if solved:
|
| 455 |
-
|
| 456 |
else:
|
| 457 |
terminal_lines.append(("c-muted", line))
|
| 458 |
|
| 459 |
if len(terminal_lines) > 500:
|
| 460 |
terminal_lines = terminal_lines[-500:]
|
| 461 |
|
| 462 |
-
yield
|
| 463 |
|
| 464 |
return_code = process.wait(timeout=2)
|
| 465 |
if return_code != 0:
|
|
@@ -469,7 +464,13 @@ def run_agent(
|
|
| 469 |
if len(terminal_lines) > 500:
|
| 470 |
terminal_lines = terminal_lines[-500:]
|
| 471 |
|
| 472 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
|
| 474 |
|
| 475 |
with gr.Blocks(title="TraceFix-RL") as demo:
|
|
@@ -497,7 +498,7 @@ with gr.Blocks(title="TraceFix-RL") as demo:
|
|
| 497 |
with gr.Accordion("Engine Parameters", open=False):
|
| 498 |
model_name = gr.Textbox(label="Model Name", value=os.getenv("MODEL_NAME", "openai/gpt-oss-20b"))
|
| 499 |
api_base_url = gr.Textbox(label="API Base URL", value=os.getenv("API_BASE_URL", "https://router.huggingface.co/v1"))
|
| 500 |
-
env_base_url = gr.Textbox(label="Env Base URL", value=os.getenv("ENV_BASE_URL", f"http://
|
| 501 |
max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
|
| 502 |
success_score_threshold = gr.Number(
|
| 503 |
label="Success Score Threshold",
|
|
@@ -513,17 +514,11 @@ with gr.Blocks(title="TraceFix-RL") as demo:
|
|
| 513 |
hard_radio = gr.Radio(choices=HARD_CHOICES, label="Hard Targets", elem_id="hard-radio")
|
| 514 |
|
| 515 |
hud_badge = gr.HTML(_update_hud_badge("", ""))
|
| 516 |
-
run_button = gr.Button("
|
| 517 |
-
|
| 518 |
-
# Sync tasks correctly
|
| 519 |
-
easy_radio.change(lambda x: sync_tasks(x, "easy"), inputs=[easy_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view] if 'code_view' in locals() else None)
|
| 520 |
-
medium_radio.change(lambda x: sync_tasks(x, "medium"), inputs=[medium_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view] if 'code_view' in locals() else None)
|
| 521 |
-
hard_radio.change(lambda x: sync_tasks(x, "hard"), inputs=[hard_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view] if 'code_view' in locals() else None)
|
| 522 |
-
|
| 523 |
|
| 524 |
with gr.Row(equal_height=True):
|
| 525 |
with gr.Column(scale=1, elem_classes=["panel", "code-panel"]):
|
| 526 |
-
gr.HTML("<div class='panel-title'>
|
| 527 |
code_view = gr.Code(
|
| 528 |
language="python",
|
| 529 |
interactive=False,
|
|
@@ -539,10 +534,9 @@ with gr.Blocks(title="TraceFix-RL") as demo:
|
|
| 539 |
with gr.Row(elem_classes=["panel"]):
|
| 540 |
result_block = gr.HTML("<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Awaiting Execution</div>")
|
| 541 |
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
hard_radio.change(lambda x: sync_tasks(x, "hard"), inputs=[hard_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view])
|
| 546 |
|
| 547 |
# Run Sequence
|
| 548 |
run_state = gr.State(value=True)
|
|
@@ -556,8 +550,8 @@ with gr.Blocks(title="TraceFix-RL") as demo:
|
|
| 556 |
|
| 557 |
reset_step = validate_step.then(
|
| 558 |
_reset_run_state,
|
| 559 |
-
inputs=[
|
| 560 |
-
outputs=[
|
| 561 |
queue=False,
|
| 562 |
)
|
| 563 |
|
|
@@ -574,5 +568,5 @@ with gr.Blocks(title="TraceFix-RL") as demo:
|
|
| 574 |
show_thought,
|
| 575 |
run_state
|
| 576 |
],
|
| 577 |
-
outputs=[code_view, terminal, result_block, run_button],
|
| 578 |
)
|
|
|
|
| 31 |
ROOT_DIR = Path(__file__).resolve().parent
|
| 32 |
INFERENCE_PATH = ROOT_DIR / "inference.py"
|
| 33 |
BACKEND_HOST = "127.0.0.1"
|
| 34 |
+
BACKEND_PORT = 7860
|
| 35 |
GRADIO_HOST = "0.0.0.0"
|
| 36 |
GRADIO_PORT = 7860
|
| 37 |
|
|
|
|
| 287 |
|
| 288 |
|
| 289 |
def sync_tasks(selected, grid_name):
|
|
|
|
| 290 |
if grid_name == "easy":
|
| 291 |
easy_val = selected
|
| 292 |
med_val = None
|
|
|
|
| 305 |
|
| 306 |
code_content = _code_from_task_name(selected)
|
| 307 |
hud_content = _update_hud_badge(selected, diff)
|
| 308 |
+
title_content = "<div class='panel-title'>Target Source Code (Buggy)</div>"
|
| 309 |
|
| 310 |
return (
|
| 311 |
selected,
|
|
|
|
| 313 |
gr.update(value=med_val),
|
| 314 |
gr.update(value=hard_val),
|
| 315 |
hud_content,
|
| 316 |
+
title_content,
|
| 317 |
code_content
|
| 318 |
)
|
| 319 |
|
| 320 |
def validate_and_start(token):
|
| 321 |
if not token or not token.strip():
|
|
|
|
| 322 |
return (
|
| 323 |
gr.update(elem_classes=["token-alert"]),
|
| 324 |
gr.update(value="ERROR: Token Required"),
|
| 325 |
False
|
| 326 |
)
|
|
|
|
| 327 |
return (
|
| 328 |
gr.update(elem_classes=[]),
|
| 329 |
gr.update(value="RUNNING...", elem_id="execute-btn-running", interactive=False),
|
| 330 |
True
|
| 331 |
)
|
| 332 |
|
| 333 |
+
def _reset_run_state():
|
| 334 |
return (
|
|
|
|
| 335 |
_terminal_html([("c-muted", "Boot sequence initialized...")]),
|
| 336 |
"<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Running...</div>"
|
| 337 |
)
|
|
|
|
| 346 |
success_score_threshold: float,
|
| 347 |
show_thought: bool,
|
| 348 |
proceed: bool
|
| 349 |
+
) -> Generator[tuple[Any, str, str, dict, Any], None, None]:
|
| 350 |
|
| 351 |
if not proceed:
|
| 352 |
+
yield (gr.skip(), gr.skip(), gr.skip(), gr.update(value="INITIATE TRACE RESOLUTION", interactive=True), gr.skip())
|
|
|
|
| 353 |
return
|
| 354 |
|
|
|
|
| 355 |
terminal_lines: list[tuple[str, str]] = []
|
| 356 |
terminal_lines.append(("c-muted", "Agent initialized... infiltrating target."))
|
| 357 |
|
| 358 |
result_html = "<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Awaiting end...</div>"
|
| 359 |
+
yield gr.skip(), _terminal_html(terminal_lines), result_html, gr.update(), gr.skip()
|
| 360 |
|
| 361 |
cmd = [sys.executable, str(INFERENCE_PATH)]
|
| 362 |
if show_thought:
|
|
|
|
| 385 |
|
| 386 |
ended_streams: set[str] = set()
|
| 387 |
thought_mode = False
|
| 388 |
+
|
| 389 |
+
final_success = False
|
| 390 |
+
final_solved_code = None
|
| 391 |
|
| 392 |
while True:
|
| 393 |
try:
|
|
|
|
| 429 |
end_match = END_RE.match(line)
|
| 430 |
|
| 431 |
if start_match:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
terminal_lines.append(("c-start", line))
|
| 433 |
elif step_match:
|
| 434 |
err = step_match.group("error")
|
|
|
|
| 444 |
result_html = _large_metric_html(success, score_value, final_steps, rewards_raw or 'none')
|
| 445 |
|
| 446 |
if success:
|
| 447 |
+
final_success = True
|
| 448 |
+
solved = _solution_from_task_name(task_name)
|
| 449 |
if solved:
|
| 450 |
+
final_solved_code = solved
|
| 451 |
else:
|
| 452 |
terminal_lines.append(("c-muted", line))
|
| 453 |
|
| 454 |
if len(terminal_lines) > 500:
|
| 455 |
terminal_lines = terminal_lines[-500:]
|
| 456 |
|
| 457 |
+
yield gr.skip(), _terminal_html(terminal_lines), result_html, gr.update(), gr.skip()
|
| 458 |
|
| 459 |
return_code = process.wait(timeout=2)
|
| 460 |
if return_code != 0:
|
|
|
|
| 464 |
if len(terminal_lines) > 500:
|
| 465 |
terminal_lines = terminal_lines[-500:]
|
| 466 |
|
| 467 |
+
code_update = gr.skip()
|
| 468 |
+
title_update = gr.skip()
|
| 469 |
+
if final_success and final_solved_code is not None:
|
| 470 |
+
code_update = final_solved_code
|
| 471 |
+
title_update = "<div class='panel-title'>Target Source Code (Resolved)</div>"
|
| 472 |
+
|
| 473 |
+
yield code_update, _terminal_html(terminal_lines), result_html, gr.update(value="INITIATE TRACE RESOLUTION", elem_id="execute-btn", interactive=True), title_update
|
| 474 |
|
| 475 |
|
| 476 |
with gr.Blocks(title="TraceFix-RL") as demo:
|
|
|
|
| 498 |
with gr.Accordion("Engine Parameters", open=False):
|
| 499 |
model_name = gr.Textbox(label="Model Name", value=os.getenv("MODEL_NAME", "openai/gpt-oss-20b"))
|
| 500 |
api_base_url = gr.Textbox(label="API Base URL", value=os.getenv("API_BASE_URL", "https://router.huggingface.co/v1"))
|
| 501 |
+
env_base_url = gr.Textbox(label="Env Base URL", value=os.getenv("ENV_BASE_URL", f"http://127.0.0.1:{BACKEND_PORT}"))
|
| 502 |
max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
|
| 503 |
success_score_threshold = gr.Number(
|
| 504 |
label="Success Score Threshold",
|
|
|
|
| 514 |
hard_radio = gr.Radio(choices=HARD_CHOICES, label="Hard Targets", elem_id="hard-radio")
|
| 515 |
|
| 516 |
hud_badge = gr.HTML(_update_hud_badge("", ""))
|
| 517 |
+
run_button = gr.Button("INITIATE TRACE RESOLUTION", elem_id="execute-btn", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
|
| 519 |
with gr.Row(equal_height=True):
|
| 520 |
with gr.Column(scale=1, elem_classes=["panel", "code-panel"]):
|
| 521 |
+
code_panel_title = gr.HTML("<div class='panel-title'>Target Source Code (Buggy)</div>")
|
| 522 |
code_view = gr.Code(
|
| 523 |
language="python",
|
| 524 |
interactive=False,
|
|
|
|
| 534 |
with gr.Row(elem_classes=["panel"]):
|
| 535 |
result_block = gr.HTML("<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Awaiting Execution</div>")
|
| 536 |
|
| 537 |
+
easy_radio.change(lambda x: sync_tasks(x, "easy"), inputs=[easy_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_panel_title, code_view])
|
| 538 |
+
medium_radio.change(lambda x: sync_tasks(x, "medium"), inputs=[medium_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_panel_title, code_view])
|
| 539 |
+
hard_radio.change(lambda x: sync_tasks(x, "hard"), inputs=[hard_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_panel_title, code_view])
|
|
|
|
| 540 |
|
| 541 |
# Run Sequence
|
| 542 |
run_state = gr.State(value=True)
|
|
|
|
| 550 |
|
| 551 |
reset_step = validate_step.then(
|
| 552 |
_reset_run_state,
|
| 553 |
+
inputs=[],
|
| 554 |
+
outputs=[terminal, result_block],
|
| 555 |
queue=False,
|
| 556 |
)
|
| 557 |
|
|
|
|
| 568 |
show_thought,
|
| 569 |
run_state
|
| 570 |
],
|
| 571 |
+
outputs=[code_view, terminal, result_block, run_button, code_panel_title],
|
| 572 |
)
|