databoysu commited on
Commit
b45a9cb
·
1 Parent(s): 547b9d6

change defaults for hf spaces

Browse files
Files changed (2) hide show
  1. inference.py +1 -1
  2. vision_ui.py +174 -220
inference.py CHANGED
@@ -42,7 +42,7 @@ MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b")
42
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or "lm-studio"
43
  LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
44
 
45
- ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://127.0.0.1:8000")
46
  TASK_NAME = os.getenv("TASK_NAME", "tracefix_rl")
47
  BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
48
  MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
 
42
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or "lm-studio"
43
  LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
44
 
45
+ ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://127.0.0.1:7860")
46
  TASK_NAME = os.getenv("TASK_NAME", "tracefix_rl")
47
  BENCHMARK = os.getenv("BENCHMARK", "tracefix_rl")
48
  MAX_STEPS = int(os.getenv("MAX_STEPS", "50"))
vision_ui.py CHANGED
@@ -68,30 +68,31 @@ CSS = """
68
  font-family: 'Inter', sans-serif !important;
69
  background: var(--bg-top);
70
  color: var(--text-main);
 
71
  }
72
 
73
  #header-wrap {
74
- margin-bottom: 5px;
75
- border: 1px solid var(--accent);
76
  background: #000;
77
  border-radius: 0px;
78
- padding: 16px 20px;
79
  text-transform: uppercase;
80
  }
81
 
82
  #header-wrap h1 {
83
  margin: 0;
84
- letter-spacing: 2px;
85
- font-weight: 900;
86
  color: #fff;
87
- font-style: italic;
88
- text-shadow: 2px 2px #E60012;
89
  }
90
 
91
  #header-wrap p {
92
- margin: 6px 0 0;
93
- color: #fff;
94
  font-weight: 500;
 
95
  }
96
 
97
  .panel {
@@ -99,34 +100,45 @@ CSS = """
99
  border-radius: 0px !important;
100
  background: var(--panel) !important;
101
  overflow: hidden;
 
102
  }
103
 
104
  .panel-title {
105
- padding: 10px 14px;
106
  border-bottom: 1px solid var(--panel-border);
107
  color: var(--text-dim);
108
- font-size: 14px;
109
- letter-spacing: 0.1em;
110
  text-transform: uppercase;
111
  font-weight: bold;
112
  }
113
 
114
  #execute-btn {
115
- background: var(--accent) !important;
116
  color: #fff !important;
117
  border-radius: 0px !important;
118
- font-weight: 900 !important;
119
- font-size: 18px !important;
120
  text-transform: uppercase !important;
121
- border: none !important;
122
  transition: all 0.2s ease !important;
123
- height: 60px !important;
124
  }
125
 
126
  #execute-btn:hover {
127
- background: #fff !important;
128
- color: var(--accent) !important;
129
- box-shadow: 0 0 15px var(--accent) !important;
 
 
 
 
 
 
 
 
 
 
130
  }
131
 
132
  .code-panel * {
@@ -134,14 +146,14 @@ CSS = """
134
  }
135
 
136
  .terminal-wrap {
137
- height: 600px;
138
  overflow-y: auto;
139
- padding: 12px;
140
  font-family: 'JetBrains Mono', monospace;
141
  font-size: 13px;
142
- line-height: 1.6;
143
  background: #050505;
144
- border: 2px solid var(--accent);
145
  }
146
 
147
  .term-line {
@@ -149,20 +161,26 @@ CSS = """
149
  word-break: break-word;
150
  }
151
 
152
- /* Cyberpunk Log Colors */
153
- .c-start { color: #E60012; font-weight: bold; }
154
- .c-end { color: #E60012; font-weight: bold; }
155
  .c-step { color: #39ff14; font-weight: bold; }
156
  .c-thought { color: #5b7a96; font-style: italic; }
157
- .c-error { color: #E60012; }
158
  .c-muted { color: var(--text-dim); }
159
 
160
  .metric {
161
- border: 1px solid var(--panel-border);
162
  background: #000;
163
- border-radius: 0px;
164
- padding: 12px;
165
- border-left: 4px solid var(--accent);
 
 
 
 
 
 
 
166
  }
167
  """
168
 
@@ -170,9 +188,8 @@ def _code_from_task_name(task_name: str) -> str:
170
  task = TASK_MAP.get((task_name or "").strip())
171
  if not task:
172
  return (
173
- "# Waiting for mission start...\n"
174
- "# Tip: Select a target from the Mission Board\n"
175
- "# so the buggy sandbox code can be previewed before launch."
176
  )
177
  return "\n".join(task.get("code", []))
178
 
@@ -184,57 +201,13 @@ def _normalize_base_url(base_url: str) -> str:
184
  candidate = f"http://{candidate}"
185
  return candidate.rstrip("/")
186
 
187
- def _code_from_openenv(task_name: str, env_base_url: str) -> str | None:
188
- normalized_url = _normalize_base_url(env_base_url)
189
- task_key = (task_name or "").strip()
190
- if not task_key:
191
- return None
192
-
193
- candidates = [
194
- f"{normalized_url}/tasks/{task_key}/code",
195
- f"{normalized_url}/task/{task_key}/code",
196
- f"{normalized_url}/tasks/{task_key}",
197
- f"{normalized_url}/task/{task_key}",
198
- ]
199
-
200
- for url in candidates:
201
- try:
202
- req = urllib.request.Request(url, method="GET")
203
- with urllib.request.urlopen(req, timeout=3) as response:
204
- if response.status != 200:
205
- continue
206
- payload = json.loads(response.read().decode("utf-8"))
207
- except (urllib.error.URLError, urllib.error.HTTPError, TimeoutError, ValueError):
208
- continue
209
-
210
- if isinstance(payload, dict):
211
- code = payload.get("code")
212
- if isinstance(code, list):
213
- return "\n".join(str(line) for line in code)
214
- if isinstance(code, str):
215
- return code
216
-
217
- task_data = payload.get("task")
218
- if isinstance(task_data, dict):
219
- task_code = task_data.get("code")
220
- if isinstance(task_code, list):
221
- return "\n".join(str(line) for line in task_code)
222
- if isinstance(task_code, str):
223
- return task_code
224
- return None
225
-
226
  def load_code(task_name: str, env_base_url: str) -> str:
227
  local_code = _code_from_task_name(task_name)
228
- if "Waiting for mission start" not in local_code:
229
  return local_code
230
-
231
- api_code = _code_from_openenv(task_name, env_base_url)
232
- if api_code:
233
- return api_code
234
-
235
  return (
236
  "# Unable to load code for the selected task.\n"
237
- "# Verify Task / Bug Selection and confirm OpenEnv API is reachable."
238
  )
239
 
240
  def _solution_from_task_name(task_name: str) -> str | None:
@@ -248,7 +221,7 @@ def _terminal_html(lines: list[tuple[str, str]]) -> str:
248
  for css_class, text in lines:
249
  safe = html.escape(text)
250
  rendered.append(f"<div class='term-line {css_class}'>{safe}</div>")
251
- content = "\n".join(rendered) if rendered else "<div class='term-line c-muted'>Idle. Configure mission variables and press EXECUTE TRACEFIX.</div>"
252
  return (
253
  "<div id='terminal' class='terminal-wrap'>"
254
  f"{content}"
@@ -258,20 +231,20 @@ def _terminal_html(lines: list[tuple[str, str]]) -> str:
258
  "</script>"
259
  )
260
 
261
- def _metric_block(state: str, details: str) -> str:
262
- return (
263
- "<div class='metric'>"
264
- f"<div><strong>{html.escape(state)}</strong></div>"
265
- f"<div style='color:var(--text-dim); margin-top: 6px'>{html.escape(details)}</div>"
266
- "</div>"
267
- )
268
-
269
  def _update_hud_badge(task_name: str, difficulty: str) -> str:
270
  if not task_name:
271
- return "<div style='padding: 10px; color: var(--text-dim); border: 1px dashed var(--panel-border); text-align: center;'>WAITING FOR TARGET SELECTION...</div>"
272
- color = "#39ff14" if difficulty == "Easy" else ("#f9d78b" if difficulty == "Medium" else "#E60012")
273
- return f"""<div style='border: 2px solid {color}; padding: 12px; background: rgba(0,0,0,0.5); color: {color}; font-weight: 900; font-size: 16px; text-transform: uppercase; text-align: center; letter-spacing: 1.5px;'>
274
- >> TARGET ACQUIRED: {html.escape(task_name)} | THREAT LEVEL: {difficulty} <<
 
 
 
 
 
 
 
 
275
  </div>"""
276
 
277
  def _reader_thread(stream: Any, source: str, out_q: queue.Queue[tuple[str, str | None]]) -> None:
@@ -291,10 +264,8 @@ def _build_env(
291
  model_name: str,
292
  env_base_url: str,
293
  task_name: str,
294
- benchmark: str,
295
  max_steps: int,
296
  success_score_threshold: float,
297
- local_image_name: str,
298
  ) -> dict[str, str]:
299
  env = os.environ.copy()
300
  updates = {
@@ -303,10 +274,8 @@ def _build_env(
303
  "MODEL_NAME": model_name,
304
  "ENV_BASE_URL": _normalize_base_url(env_base_url),
305
  "TASK_NAME": task_name,
306
- "BENCHMARK": benchmark,
307
  "MAX_STEPS": str(int(max_steps)),
308
  "SUCCESS_SCORE_THRESHOLD": str(float(success_score_threshold)),
309
- "LOCAL_IMAGE_NAME": local_image_name,
310
  }
311
  for key, value in updates.items():
312
  cleaned = (value or "").strip()
@@ -316,61 +285,90 @@ def _build_env(
316
  env.pop(key, None)
317
  return env
318
 
319
- def get_active_task(easy, medium, hard):
320
- return (easy or medium or hard or "").strip()
321
 
322
- def _reset_run_state(easy, medium, hard):
323
- task_name = get_active_task(easy, medium, hard)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  return (
325
  _code_from_task_name(task_name),
326
- _terminal_html([]),
327
- _metric_block("Mission Ready", "Awaiting [START] from inference subprocess..."),
328
- 0.0,
329
- "`Rewards:` pending"
330
  )
331
 
332
  def run_agent(
333
- easy_radio: str,
334
- medium_radio: str,
335
- hard_radio: str,
336
  hf_token: str,
337
  api_base_url: str,
338
  model_name: str,
339
  env_base_url: str,
340
- benchmark: str,
341
  max_steps: int,
342
  success_score_threshold: float,
343
- local_image_name: str,
344
- difficulty: str,
345
  show_thought: bool,
346
- ) -> Generator[tuple[str, str, str, float, str, dict], None, None]:
 
347
 
348
- task_name = get_active_task(easy_radio, medium_radio, hard_radio)
 
 
 
 
349
  code_view = _code_from_task_name(task_name)
350
  terminal_lines: list[tuple[str, str]] = []
351
- terminal_lines.append(("c-muted", "Boot sequence initialized... infiltrating target."))
352
 
353
- status_html = _metric_block("Mission Infiltration", "Launching inference subprocess...")
354
- score_value = 0.0
355
- rewards_md = "`Rewards:` pending"
356
- yield code_view, _terminal_html(terminal_lines), status_html, score_value, rewards_md, gr.update(value="INFILTRATING...", interactive=False)
357
 
358
  cmd = [sys.executable, str(INFERENCE_PATH)]
359
- if difficulty in {"easy", "medium", "hard"}:
360
- cmd.append(f"--{difficulty}")
361
  if show_thought:
362
  cmd.append("--thought")
363
 
364
  env = _build_env(
365
- hf_token,
366
- api_base_url,
367
- model_name,
368
- env_base_url,
369
- task_name,
370
- benchmark,
371
- max_steps,
372
- success_score_threshold,
373
- local_image_name,
374
  )
375
 
376
  process = subprocess.Popen(
@@ -392,7 +390,6 @@ def run_agent(
392
  ended_streams: set[str] = set()
393
  thought_mode = False
394
  active_task_name = task_name
395
- final_steps = 0
396
 
397
  while True:
398
  try:
@@ -421,14 +418,13 @@ def run_agent(
421
  terminal_lines.append(("c-thought", line))
422
  else:
423
  if not show_thought:
424
- # Strict gatekeeper rules over stderr leakage too
425
  if not any(tag in line for tag in ["[START]", "[STEP]", "[END]"]):
426
  continue
427
  terminal_lines.append(("c-error", line))
428
  else:
429
  if not show_thought:
430
  if not any(tag in line for tag in ["[START]", "[STEP]", "[END]"]):
431
- continue # Strict Gatekeeper skipping log
432
 
433
  start_match = START_RE.match(line)
434
  step_match = STEP_RE.match(line)
@@ -437,77 +433,57 @@ def run_agent(
437
  if start_match:
438
  active_task_name = start_match.group("task").strip()
439
  task_preview = _code_from_task_name(active_task_name)
440
- if "Waiting for mission start" not in task_preview:
441
  code_view = task_preview
442
  terminal_lines.append(("c-start", line))
443
- status_html = _metric_block(
444
- "Mission Running",
445
- f"task={active_task_name} | env={start_match.group('env')} | model={start_match.group('model')}",
446
- )
447
  elif step_match:
448
- final_steps = int(step_match.group("step"))
449
- action = step_match.group("action")
450
- reward = float(step_match.group("reward"))
451
- done_flag = step_match.group("done") == "true"
452
  err = step_match.group("error")
453
  css = "c-step" if err == "null" else "c-error"
454
  terminal_lines.append((css, line))
455
- status_html = _metric_block(
456
- "Mission Running",
457
- f"step={final_steps} action={action} reward={reward:.2f} done={str(done_flag).lower()}",
458
- )
459
  elif end_match:
460
  success = end_match.group("success") == "true"
461
  final_steps = int(end_match.group("steps"))
462
  score_value = float(end_match.group("score"))
463
  rewards_raw = end_match.group("rewards").strip()
464
- rewards_md = f"`Rewards:` {rewards_raw or 'none'}"
465
  terminal_lines.append(("c-end", line))
 
 
 
466
  if success:
467
  solved = _solution_from_task_name(active_task_name)
468
  if solved:
469
  code_view = solved
470
- status_html = _metric_block(
471
- "Mission Success",
472
- f"score={score_value:.2f} | steps={final_steps}",
473
- )
474
- else:
475
- status_html = _metric_block(
476
- "Mission Failed",
477
- f"score={score_value:.2f} | steps={final_steps}",
478
- )
479
  else:
480
  terminal_lines.append(("c-muted", line))
481
 
482
  if len(terminal_lines) > 500:
483
  terminal_lines = terminal_lines[-500:]
484
 
485
- yield code_view, _terminal_html(terminal_lines), status_html, score_value, rewards_md, gr.update(value="INFILTRATING...", interactive=False)
486
 
487
  return_code = process.wait(timeout=2)
488
  if return_code != 0:
489
  terminal_lines.append(("c-error", f"Process exited with code {return_code}."))
490
- status_html = _metric_block(
491
- "Mission Error",
492
- f"inference.py exited non-zero (code={return_code})",
493
- )
494
 
495
  if len(terminal_lines) > 500:
496
  terminal_lines = terminal_lines[-500:]
497
 
498
- yield code_view, _terminal_html(terminal_lines), status_html, score_value, rewards_md, gr.update(value="EXECUTE TRACEFIX", interactive=True)
499
 
500
 
501
- with gr.Blocks(title="TraceFix-RL Mission Control") as demo:
502
  gr.HTML(
503
  f"""
504
  <style>{CSS}</style>
505
  <div id='header-wrap'>
506
- <h1>TraceFix-RL /// PHANTOM PROTOCOL</h1>
507
- <p>Real-time autonomous agent infiltration orchestration.</p>
508
  </div>
509
  """
510
  )
 
 
511
 
512
  if hasattr(gr, "Sidebar"):
513
  sidebar_context = gr.Sidebar()
@@ -515,110 +491,88 @@ with gr.Blocks(title="TraceFix-RL Mission Control") as demo:
515
  sidebar_context = gr.Column()
516
 
517
  with sidebar_context:
518
- # Zone 1: The Config Sidebar
519
- gr.Markdown("### CORE AUTHENTICATION")
520
- hf_token = gr.Textbox(label="HF Token", type="password", placeholder="hf_xxx")
521
 
522
- with gr.Accordion("Advanced Engine Parameters", open=False):
523
  model_name = gr.Textbox(label="Model Name", value=os.getenv("MODEL_NAME", "openai/gpt-oss-20b"))
524
  api_base_url = gr.Textbox(label="API Base URL", value=os.getenv("API_BASE_URL", "https://router.huggingface.co/v1"))
525
  env_base_url = gr.Textbox(label="Env Base URL", value=os.getenv("ENV_BASE_URL", f"http://{BACKEND_HOST}:{BACKEND_PORT}"))
526
- benchmark = gr.Textbox(label="Benchmark", value=os.getenv("BENCHMARK", "tracefix_rl"))
527
- local_image_name = gr.Textbox(label="Local Image Name", value=os.getenv("LOCAL_IMAGE_NAME", ""), placeholder="optional")
528
  max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
529
  success_score_threshold = gr.Number(
530
  label="Success Score Threshold",
531
  value=float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99")),
532
  precision=2,
533
  )
534
- difficulty = gr.Dropdown(label="Difficulty", choices=["auto", "easy", "medium", "hard"], value="auto")
535
- show_thought = gr.Checkbox(label="Stream Thought Trace", value=True)
536
 
537
- # Zone 2: The Mission Board
538
- gr.HTML("<div class='panel-title' style='margin-top: 10px;'>MISSION BOARD /// TARGET SELECTION</div>")
539
  with gr.Row(elem_classes=["panel"]):
540
  easy_radio = gr.Radio(choices=EASY_CHOICES, label="Easy Targets", elem_id="easy-radio")
541
  medium_radio = gr.Radio(choices=MEDIUM_CHOICES, label="Medium Targets", elem_id="medium-radio")
542
  hard_radio = gr.Radio(choices=HARD_CHOICES, label="Hard Targets", elem_id="hard-radio")
543
 
544
- # Zone 3: The HUD
545
  hud_badge = gr.HTML(_update_hud_badge("", ""))
546
- run_button = gr.Button("EXECUTE TRACEFIX", elem_id="execute-btn", variant="primary")
547
 
548
- # Radio change handlers for mutual exclusivity logic & HUD updates
549
- def select_easy(val):
550
- if not val:
551
- return gr.skip(), gr.skip(), gr.skip(), gr.skip()
552
- return None, None, _update_hud_badge(val, "Easy"), _code_from_task_name(val)
553
 
554
- def select_medium(val):
555
- if not val:
556
- return gr.skip(), gr.skip(), gr.skip(), gr.skip()
557
- return None, None, _update_hud_badge(val, "Medium"), _code_from_task_name(val)
558
 
559
- def select_hard(val):
560
- if not val:
561
- return gr.skip(), gr.skip(), gr.skip(), gr.skip()
562
- return None, None, _update_hud_badge(val, "Hard"), _code_from_task_name(val)
563
-
564
- # Zone 4: The Arena
565
  with gr.Row(equal_height=True):
566
  with gr.Column(scale=1, elem_classes=["panel", "code-panel"]):
567
- gr.HTML("<div class='panel-title'>SANDBOX CODE</div>")
568
  code_view = gr.Code(
569
  language="python",
570
  interactive=False,
571
  value=_code_from_task_name(""),
572
- lines=30,
573
  )
 
 
574
 
575
  with gr.Column(scale=1, elem_classes=["panel"]):
576
- gr.HTML("<div class='panel-title'>TERMINAL TRACE</div>")
577
  terminal = gr.HTML(_terminal_html([]))
578
 
579
- with gr.Row():
580
- metric = gr.HTML(_metric_block("Idle", "Awaiting target selection."))
581
- score = gr.Number(label="Final Score", value=0.0, precision=3)
582
- rewards = gr.Markdown("`Rewards:` pending")
583
 
584
- easy_radio.change(select_easy, inputs=[easy_radio], outputs=[medium_radio, hard_radio, hud_badge, code_view])
585
- medium_radio.change(select_medium, inputs=[medium_radio], outputs=[easy_radio, hard_radio, hud_badge, code_view])
586
- hard_radio.change(select_hard, inputs=[hard_radio], outputs=[easy_radio, medium_radio, hud_badge, code_view])
 
587
 
588
  # Run Sequence
589
- # First disable button to show immediate feedback
590
- run_immediate = run_button.click(
591
- lambda: gr.update(value="INFILTRATING...", interactive=False),
592
- inputs=[],
593
- outputs=[run_button],
 
594
  queue=False
595
  )
596
 
597
- # Then reset state
598
- run_event = run_immediate.then(
599
  _reset_run_state,
600
- inputs=[easy_radio, medium_radio, hard_radio],
601
- outputs=[code_view, terminal, metric, score, rewards],
602
  queue=False,
603
  )
604
 
605
- # Finally run generator (loads environment, streams stdout, then re-enables button upon END)
606
- run_event.then(
607
  run_agent,
608
  inputs=[
609
- easy_radio,
610
- medium_radio,
611
- hard_radio,
612
  hf_token,
613
  api_base_url,
614
  model_name,
615
  env_base_url,
616
- benchmark,
617
  max_steps,
618
  success_score_threshold,
619
- local_image_name,
620
- difficulty,
621
  show_thought,
 
622
  ],
623
- outputs=[code_view, terminal, metric, score, rewards, run_button],
624
  )
 
68
  font-family: 'Inter', sans-serif !important;
69
  background: var(--bg-top);
70
  color: var(--text-main);
71
+ padding: 0px !important;
72
  }
73
 
74
  #header-wrap {
75
+ margin-bottom: 2px;
76
+ border: 1px solid var(--panel-border);
77
  background: #000;
78
  border-radius: 0px;
79
+ padding: 8px 12px;
80
  text-transform: uppercase;
81
  }
82
 
83
  #header-wrap h1 {
84
  margin: 0;
85
+ letter-spacing: 1px;
86
+ font-weight: 700;
87
  color: #fff;
88
+ font-size: 20px;
 
89
  }
90
 
91
  #header-wrap p {
92
+ margin: 2px 0 0;
93
+ color: var(--text-dim);
94
  font-weight: 500;
95
+ font-size: 13px;
96
  }
97
 
98
  .panel {
 
100
  border-radius: 0px !important;
101
  background: var(--panel) !important;
102
  overflow: hidden;
103
+ padding: 0px !important;
104
  }
105
 
106
  .panel-title {
107
+ padding: 6px 10px;
108
  border-bottom: 1px solid var(--panel-border);
109
  color: var(--text-dim);
110
+ font-size: 12px;
111
+ letter-spacing: 0.05em;
112
  text-transform: uppercase;
113
  font-weight: bold;
114
  }
115
 
116
  #execute-btn {
117
+ background: #2b2b2b !important;
118
  color: #fff !important;
119
  border-radius: 0px !important;
120
+ font-weight: 700 !important;
121
+ font-size: 16px !important;
122
  text-transform: uppercase !important;
123
+ border: 2px solid #fff !important;
124
  transition: all 0.2s ease !important;
125
+ height: 40px !important;
126
  }
127
 
128
  #execute-btn:hover {
129
+ background: #801a1a !important;
130
+ border-color: #ff4a4a !important;
131
+ }
132
+
133
+ #execute-btn-running {
134
+ background: #801a1a !important;
135
+ color: #fff !important;
136
+ border-radius: 0px !important;
137
+ font-weight: 700 !important;
138
+ font-size: 16px !important;
139
+ text-transform: uppercase !important;
140
+ border: 2px solid #ff4a4a !important;
141
+ height: 40px !important;
142
  }
143
 
144
  .code-panel * {
 
146
  }
147
 
148
  .terminal-wrap {
149
+ height: 45vh;
150
  overflow-y: auto;
151
+ padding: 8px;
152
  font-family: 'JetBrains Mono', monospace;
153
  font-size: 13px;
154
+ line-height: 1.5;
155
  background: #050505;
156
+ border: 1px solid var(--panel-border);
157
  }
158
 
159
  .term-line {
 
161
  word-break: break-word;
162
  }
163
 
164
+ /* Base Log Colors */
165
+ .c-start { color: #fff; font-weight: bold; }
166
+ .c-end { color: #fff; font-weight: bold; }
167
  .c-step { color: #39ff14; font-weight: bold; }
168
  .c-thought { color: #5b7a96; font-style: italic; }
169
+ .c-error { color: #ff4a4a; }
170
  .c-muted { color: var(--text-dim); }
171
 
172
  .metric {
 
173
  background: #000;
174
+ padding: 4px;
175
+ }
176
+
177
+ @keyframes pulse-border {
178
+ 0% { border-color: #ff4a4a; box-shadow: 0 0 10px #ff4a4a; }
179
+ 50% { border-color: #2b2b2b; box-shadow: none; }
180
+ 100% { border-color: #ff4a4a; box-shadow: 0 0 10px #ff4a4a; }
181
+ }
182
+ .token-alert > div > input {
183
+ animation: pulse-border 1.5s infinite;
184
  }
185
  """
186
 
 
188
  task = TASK_MAP.get((task_name or "").strip())
189
  if not task:
190
  return (
191
+ "# Waiting for selection...\n"
192
+ "# Tip: Select a target from the Task Selection Grid\n"
 
193
  )
194
  return "\n".join(task.get("code", []))
195
 
 
201
  candidate = f"http://{candidate}"
202
  return candidate.rstrip("/")
203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  def load_code(task_name: str, env_base_url: str) -> str:
205
  local_code = _code_from_task_name(task_name)
206
+ if "Waiting for selection" not in local_code:
207
  return local_code
 
 
 
 
 
208
  return (
209
  "# Unable to load code for the selected task.\n"
210
+ "# Verify OpenEnv API is reachable."
211
  )
212
 
213
  def _solution_from_task_name(task_name: str) -> str | None:
 
221
  for css_class, text in lines:
222
  safe = html.escape(text)
223
  rendered.append(f"<div class='term-line {css_class}'>{safe}</div>")
224
+ content = "\n".join(rendered) if rendered else "<div class='term-line c-muted'>Idle. Configure parameters and run agent.</div>"
225
  return (
226
  "<div id='terminal' class='terminal-wrap'>"
227
  f"{content}"
 
231
  "</script>"
232
  )
233
 
 
 
 
 
 
 
 
 
234
  def _update_hud_badge(task_name: str, difficulty: str) -> str:
235
  if not task_name:
236
+ return "<div style='padding: 6px; color: var(--text-dim); text-align: center; font-size: 14px;'>Waiting for Task Selection...</div>"
237
+ return f"""<div style='padding: 6px; color: #fff; font-weight: 700; font-size: 15px; text-transform: uppercase; text-align: center;'>
238
+ Active Task: {html.escape(task_name)} | Difficulty: {difficulty.capitalize()}
239
+ </div>"""
240
+
241
+ def _large_metric_html(success: bool, score: float, steps: int, reward: str) -> str:
242
+ color = "#39ff14" if success else "#ff4a4a"
243
+ status_text = "SUCCESS" if success else "FAILED"
244
+ return f"""<div style='text-align: center; padding: 10px; border: 1px solid var(--panel-border); background: #000;'>
245
+ <h1 style='color: {color}; margin: 0; font-size: 32px; font-weight: 900;'>{status_text}</h1>
246
+ <h3 style='color: #fff; margin: 4px 0 0 0;'>Score: {score:.2f} | Steps: {steps}</h3>
247
+ <p style='color: var(--text-dim); margin: 4px 0 0 0;'>Rewards: {html.escape(reward)}</p>
248
  </div>"""
249
 
250
  def _reader_thread(stream: Any, source: str, out_q: queue.Queue[tuple[str, str | None]]) -> None:
 
264
  model_name: str,
265
  env_base_url: str,
266
  task_name: str,
 
267
  max_steps: int,
268
  success_score_threshold: float,
 
269
  ) -> dict[str, str]:
270
  env = os.environ.copy()
271
  updates = {
 
274
  "MODEL_NAME": model_name,
275
  "ENV_BASE_URL": _normalize_base_url(env_base_url),
276
  "TASK_NAME": task_name,
 
277
  "MAX_STEPS": str(int(max_steps)),
278
  "SUCCESS_SCORE_THRESHOLD": str(float(success_score_threshold)),
 
279
  }
280
  for key, value in updates.items():
281
  cleaned = (value or "").strip()
 
285
  env.pop(key, None)
286
  return env
287
 
 
 
288
 
289
+ def sync_tasks(selected, grid_name):
290
+ # Depending on which grid was clicked, clear the others and fetch code
291
+ if grid_name == "easy":
292
+ easy_val = selected
293
+ med_val = None
294
+ hard_val = None
295
+ diff = "easy"
296
+ elif grid_name == "medium":
297
+ easy_val = None
298
+ med_val = selected
299
+ hard_val = None
300
+ diff = "medium"
301
+ else:
302
+ easy_val = None
303
+ med_val = None
304
+ hard_val = selected
305
+ diff = "hard"
306
+
307
+ code_content = _code_from_task_name(selected)
308
+ hud_content = _update_hud_badge(selected, diff)
309
+
310
+ return (
311
+ selected,
312
+ gr.update(value=easy_val),
313
+ gr.update(value=med_val),
314
+ gr.update(value=hard_val),
315
+ hud_content,
316
+ code_content
317
+ )
318
+
319
+ def validate_and_start(token):
320
+ if not token or not token.strip():
321
+ # Empty token: halt execution, render alert, push an error state early
322
+ return (
323
+ gr.update(elem_classes=["token-alert"]),
324
+ gr.update(value="ERROR: Token Required"),
325
+ False
326
+ )
327
+ # Valid token
328
+ return (
329
+ gr.update(elem_classes=[]),
330
+ gr.update(value="RUNNING...", elem_id="execute-btn-running", interactive=False),
331
+ True
332
+ )
333
+
334
+ def _reset_run_state(task_name):
335
  return (
336
  _code_from_task_name(task_name),
337
+ _terminal_html([("c-muted", "Boot sequence initialized...")]),
338
+ "<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Running...</div>"
 
 
339
  )
340
 
341
  def run_agent(
342
+ task_name: str,
 
 
343
  hf_token: str,
344
  api_base_url: str,
345
  model_name: str,
346
  env_base_url: str,
 
347
  max_steps: int,
348
  success_score_threshold: float,
 
 
349
  show_thought: bool,
350
+ proceed: bool
351
+ ) -> Generator[tuple[str, str, str, dict], None, None]:
352
 
353
+ if not proceed:
354
+ # User didn't pass auth check
355
+ yield (gr.skip(), gr.skip(), gr.skip(), gr.update(value="Run Debugging Agent", interactive=True))
356
+ return
357
+
358
  code_view = _code_from_task_name(task_name)
359
  terminal_lines: list[tuple[str, str]] = []
360
+ terminal_lines.append(("c-muted", "Agent initialized... infiltrating target."))
361
 
362
+ result_html = "<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Awaiting end...</div>"
363
+ yield code_view, _terminal_html(terminal_lines), result_html, gr.update()
 
 
364
 
365
  cmd = [sys.executable, str(INFERENCE_PATH)]
 
 
366
  if show_thought:
367
  cmd.append("--thought")
368
 
369
  env = _build_env(
370
+ hf_token, api_base_url, model_name, env_base_url,
371
+ task_name, max_steps, success_score_threshold
 
 
 
 
 
 
 
372
  )
373
 
374
  process = subprocess.Popen(
 
390
  ended_streams: set[str] = set()
391
  thought_mode = False
392
  active_task_name = task_name
 
393
 
394
  while True:
395
  try:
 
418
  terminal_lines.append(("c-thought", line))
419
  else:
420
  if not show_thought:
 
421
  if not any(tag in line for tag in ["[START]", "[STEP]", "[END]"]):
422
  continue
423
  terminal_lines.append(("c-error", line))
424
  else:
425
  if not show_thought:
426
  if not any(tag in line for tag in ["[START]", "[STEP]", "[END]"]):
427
+ continue
428
 
429
  start_match = START_RE.match(line)
430
  step_match = STEP_RE.match(line)
 
433
  if start_match:
434
  active_task_name = start_match.group("task").strip()
435
  task_preview = _code_from_task_name(active_task_name)
436
+ if "Waiting for selection" not in task_preview:
437
  code_view = task_preview
438
  terminal_lines.append(("c-start", line))
 
 
 
 
439
  elif step_match:
 
 
 
 
440
  err = step_match.group("error")
441
  css = "c-step" if err == "null" else "c-error"
442
  terminal_lines.append((css, line))
 
 
 
 
443
  elif end_match:
444
  success = end_match.group("success") == "true"
445
  final_steps = int(end_match.group("steps"))
446
  score_value = float(end_match.group("score"))
447
  rewards_raw = end_match.group("rewards").strip()
 
448
  terminal_lines.append(("c-end", line))
449
+
450
+ result_html = _large_metric_html(success, score_value, final_steps, rewards_raw or 'none')
451
+
452
  if success:
453
  solved = _solution_from_task_name(active_task_name)
454
  if solved:
455
  code_view = solved
 
 
 
 
 
 
 
 
 
456
  else:
457
  terminal_lines.append(("c-muted", line))
458
 
459
  if len(terminal_lines) > 500:
460
  terminal_lines = terminal_lines[-500:]
461
 
462
+ yield code_view, _terminal_html(terminal_lines), result_html, gr.update()
463
 
464
  return_code = process.wait(timeout=2)
465
  if return_code != 0:
466
  terminal_lines.append(("c-error", f"Process exited with code {return_code}."))
467
+ result_html = _large_metric_html(False, 0.0, 0, f"Error code {return_code}")
 
 
 
468
 
469
  if len(terminal_lines) > 500:
470
  terminal_lines = terminal_lines[-500:]
471
 
472
+ yield code_view, _terminal_html(terminal_lines), result_html, gr.update(value="Run Debugging Agent", elem_id="execute-btn", interactive=True)
473
 
474
 
475
+ with gr.Blocks(title="TraceFix-RL") as demo:
476
  gr.HTML(
477
  f"""
478
  <style>{CSS}</style>
479
  <div id='header-wrap'>
480
+ <h1>TraceFix-RL: Auto SWE OpenEnv RL</h1>
481
+ <p>Professional Autonomous Agent Trace Orchestration.</p>
482
  </div>
483
  """
484
  )
485
+
486
+ selected_task_state = gr.State(value="")
487
 
488
  if hasattr(gr, "Sidebar"):
489
  sidebar_context = gr.Sidebar()
 
491
  sidebar_context = gr.Column()
492
 
493
  with sidebar_context:
494
+ gr.Markdown("### Authentication")
495
+ hf_token = gr.Textbox(label="HF Token", type="password", placeholder="hf_xxx", elem_classes=[])
 
496
 
497
+ with gr.Accordion("Engine Parameters", open=False):
498
  model_name = gr.Textbox(label="Model Name", value=os.getenv("MODEL_NAME", "openai/gpt-oss-20b"))
499
  api_base_url = gr.Textbox(label="API Base URL", value=os.getenv("API_BASE_URL", "https://router.huggingface.co/v1"))
500
  env_base_url = gr.Textbox(label="Env Base URL", value=os.getenv("ENV_BASE_URL", f"http://{BACKEND_HOST}:{BACKEND_PORT}"))
 
 
501
  max_steps = gr.Number(label="Max Steps", value=int(os.getenv("MAX_STEPS", "50")), precision=0)
502
  success_score_threshold = gr.Number(
503
  label="Success Score Threshold",
504
  value=float(os.getenv("SUCCESS_SCORE_THRESHOLD", "0.99")),
505
  precision=2,
506
  )
507
+ show_thought = gr.Checkbox(label="Stream Thought Trace", value=False)
 
508
 
509
+ gr.HTML("<div class='panel-title'>Task Selection Grid</div>")
 
510
  with gr.Row(elem_classes=["panel"]):
511
  easy_radio = gr.Radio(choices=EASY_CHOICES, label="Easy Targets", elem_id="easy-radio")
512
  medium_radio = gr.Radio(choices=MEDIUM_CHOICES, label="Medium Targets", elem_id="medium-radio")
513
  hard_radio = gr.Radio(choices=HARD_CHOICES, label="Hard Targets", elem_id="hard-radio")
514
 
 
515
  hud_badge = gr.HTML(_update_hud_badge("", ""))
516
+ run_button = gr.Button("Run Debugging Agent", elem_id="execute-btn", variant="primary")
517
 
518
+ # Sync tasks correctly
519
+ easy_radio.change(lambda x: sync_tasks(x, "easy"), inputs=[easy_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view] if 'code_view' in locals() else None)
520
+ medium_radio.change(lambda x: sync_tasks(x, "medium"), inputs=[medium_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view] if 'code_view' in locals() else None)
521
+ hard_radio.change(lambda x: sync_tasks(x, "hard"), inputs=[hard_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view] if 'code_view' in locals() else None)
 
522
 
 
 
 
 
523
 
 
 
 
 
 
 
524
  with gr.Row(equal_height=True):
525
  with gr.Column(scale=1, elem_classes=["panel", "code-panel"]):
526
+ gr.HTML("<div class='panel-title'>Sandbox Source Code</div>")
527
  code_view = gr.Code(
528
  language="python",
529
  interactive=False,
530
  value=_code_from_task_name(""),
 
531
  )
532
+ # Override height via CSS
533
+ gr.HTML("<style>.code-panel .cm-content { height: 45vh; overflow-y: auto; }</style>")
534
 
535
  with gr.Column(scale=1, elem_classes=["panel"]):
536
+ gr.HTML("<div class='panel-title'>Terminal Trace</div>")
537
  terminal = gr.HTML(_terminal_html([]))
538
 
539
+ with gr.Row(elem_classes=["panel"]):
540
+ result_block = gr.HTML("<div style='text-align: center; color: var(--text-dim); padding: 20px;'>Awaiting Execution</div>")
 
 
541
 
542
+ # Due to 'code_view' not being defined above when change was defined, we must re-bind the change events to include code_view.
543
+ easy_radio.change(lambda x: sync_tasks(x, "easy"), inputs=[easy_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view])
544
+ medium_radio.change(lambda x: sync_tasks(x, "medium"), inputs=[medium_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view])
545
+ hard_radio.change(lambda x: sync_tasks(x, "hard"), inputs=[hard_radio], outputs=[selected_task_state, easy_radio, medium_radio, hard_radio, hud_badge, code_view])
546
 
547
  # Run Sequence
548
+ run_state = gr.State(value=True)
549
+
550
+ validate_step = run_button.click(
551
+ validate_and_start,
552
+ inputs=[hf_token],
553
+ outputs=[hf_token, run_button, run_state],
554
  queue=False
555
  )
556
 
557
+ reset_step = validate_step.then(
 
558
  _reset_run_state,
559
+ inputs=[selected_task_state],
560
+ outputs=[code_view, terminal, result_block],
561
  queue=False,
562
  )
563
 
564
+ reset_step.then(
 
565
  run_agent,
566
  inputs=[
567
+ selected_task_state,
 
 
568
  hf_token,
569
  api_base_url,
570
  model_name,
571
  env_base_url,
 
572
  max_steps,
573
  success_score_threshold,
 
 
574
  show_thought,
575
+ run_state
576
  ],
577
+ outputs=[code_view, terminal, result_block, run_button],
578
  )