Spaces:
Sleeping
Sleeping
| import json | |
| from typing import Any, Callable | |
| import gradio as gr | |
| def create_custom_ui(env: Any, action_cls: Any, observation_cls: Any) -> gr.Blocks: | |
| # A dark, cyber-security focused theme mapping closely to Voyager's, | |
| # but tailored for PrivilegeDesk (blues, purples, neon green). | |
| theme = gr.themes.Soft( | |
| primary_hue="indigo", | |
| secondary_hue="blue", | |
| neutral_hue="slate", | |
| ).set( | |
| body_background_fill="*neutral_950", | |
| body_background_fill_dark="*neutral_950", | |
| block_background_fill="*neutral_900", | |
| block_background_fill_dark="*neutral_900", | |
| block_border_color="*neutral_800", | |
| block_border_color_dark="*neutral_800", | |
| block_label_background_fill="*neutral_800", | |
| button_primary_background_fill="*primary_600", | |
| button_primary_background_fill_dark="*primary_600", | |
| button_secondary_background_fill="*primary_800", | |
| button_secondary_background_fill_dark="*primary_800", | |
| border_color_primary="*neutral_800", | |
| background_fill_secondary="*neutral_900", | |
| color_accent="*primary_500", | |
| ) | |
| custom_css = """ | |
| /* VOYAGER Premium Styling Clones */ | |
| .gradio-container { | |
| font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif; | |
| } | |
| #header-container { | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| padding-top: 2rem; | |
| } | |
| #header-title { | |
| font-size: 2.5rem; | |
| font-weight: 800; | |
| background: linear-gradient(90deg, #818cf8, #3b82f6); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| margin-bottom: 0.5rem; | |
| display: flex; | |
| justify-content: center; | |
| align-items: center; | |
| gap: 12px; | |
| } | |
| #header-subtitle { | |
| color: #94a3b8; | |
| font-size: 1.1rem; | |
| font-weight: 400; | |
| } | |
| /* Stats Row styling */ | |
| .stat-box { | |
| background: #1e293b; | |
| border-radius: 8px; | |
| padding: 1.25rem; | |
| text-align: center; | |
| border: 1px solid #334155; | |
| box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1); | |
| } | |
| .stat-number { | |
| font-size: 1.8rem; | |
| font-weight: 700; | |
| color: #818cf8; | |
| margin-bottom: 0.25rem; | |
| } | |
| .stat-label { | |
| font-size: 0.75rem; | |
| font-weight: 600; | |
| color: #64748b; | |
| text-transform: uppercase; | |
| letter-spacing: 0.05em; | |
| } | |
| /* Episode Log Console */ | |
| #episode-log { | |
| background-color: #0f172a; | |
| color: #a78bfa; | |
| font-family: 'JetBrains Mono', 'Fira Code', monospace; | |
| padding: 1rem; | |
| border: 1px solid #1e293b; | |
| border-radius: 8px; | |
| height: 250px; | |
| overflow-y: auto; | |
| } | |
| #episode-log.yellow-text { color: #facc15; } | |
| /* Tool blocks styling */ | |
| .tool-group { | |
| background-color: #1e293b; | |
| border-radius: 6px; | |
| padding: 10px; | |
| margin-bottom: 10px; | |
| border-left: 3px solid #818cf8; | |
| } | |
| .tool-group h4 { | |
| color: #94a3b8; | |
| margin-top: 0; | |
| margin-bottom: 8px; | |
| font-size: 0.85rem; | |
| text-transform: uppercase; | |
| } | |
| .tool-tag { | |
| display: inline-block; | |
| background: #0f172a; | |
| color: #38bdf8; | |
| padding: 3px 8px; | |
| border-radius: 4px; | |
| font-size: 0.8rem; | |
| margin: 2px; | |
| font-family: 'JetBrains Mono', monospace; | |
| } | |
| """ | |
| with gr.Blocks(theme=theme, css=custom_css, title="PrivilegeDesk - OpenEnv") as demo: | |
| # ─── HEADER ─────────────────────────────────────────────────────────── | |
| with gr.Column(elem_id="header-container"): | |
| gr.HTML(""" | |
| <div id="header-title"> | |
| <svg width="40" height="40" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-shield-check"><path d="M12 22s8-4 8-10V5l-8-3-8 3v7c0 6 8 10 8 10z"/><path d="m9 12 2 2 4-4"/></svg> | |
| PrivilegeDesk Environment | |
| </div> | |
| <div id="header-subtitle">Interactive Simulated IAM & Privilege Management Environment for RL Agents</div> | |
| """) | |
| # ─── STATS ROW ──────────────────────────────────────────────────────── | |
| with gr.Row(): | |
| def make_stat_box(val, label): | |
| return f"""<div class="stat-box"><div class="stat-number">{val}</div><div class="stat-label">{label}</div></div>""" | |
| gr.HTML(make_stat_box("19", "Tools")) | |
| gr.HTML(make_stat_box("6", "Categories")) | |
| gr.HTML(make_stat_box("18", "Reward Signals")) | |
| gr.HTML(make_stat_box("3", "Difficulty Levels")) | |
| gr.HTML(make_stat_box("3", "Tasks (Easy, Med, Hard)")) | |
| # We use state to hold cumulative reward / step | |
| step_disp = gr.HTML(make_stat_box("-", "Current Step")) | |
| reward_disp = gr.HTML(make_stat_box("-", "Cum. Reward")) | |
| # ─── MAIN INTERFACE ─────────────────────────────────────────────────── | |
| with gr.Row(): | |
| # LEFT COLUMN: Interactive Demo | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 🎮 Interactive Demo – Try the Environment") | |
| with gr.Row(): | |
| task_dropdown = gr.Dropdown( | |
| choices=["access_decision", "jit_escalation", "access_review"], | |
| value="access_decision", | |
| label="Task", | |
| container=False, | |
| scale=3 | |
| ) | |
| difficulty_dropdown = gr.Dropdown( | |
| choices=["Difficulty 1", "Difficulty 2", "Difficulty 3"], | |
| value="Difficulty 1", | |
| label="Difficulty", | |
| container=False, | |
| scale=2 | |
| ) | |
| seed_input = gr.Number(value=42, label="Seed", precision=0, container=False, scale=1) | |
| reset_btn = gr.Button("🔄 Reset Episode", variant="huggingface") # Blue/Indigo variant | |
| step_btn = gr.Button("▶ Step", variant="primary") # Green accent variant | |
| with gr.Row(): | |
| tool_dropdown = gr.Dropdown( | |
| choices=[ | |
| "policy.lookup", "policy.list", | |
| "org.get_user", "org.get_manager", "org.list_users", | |
| "entitlement.list", "entitlement.inspect", "entitlement.revoke", | |
| "request.view", "request.list", | |
| "approval.route", "approval.check_status", | |
| "access.decide", "access.grant", "access.set_ttl", | |
| "audit.query", "group.resolve", "workflow.check_active", "review.submit" | |
| ], | |
| label="Tool Call", | |
| scale=2 | |
| ) | |
| last_result = gr.Textbox(label="Last Tool Result", interactive=False, lines=1) | |
| args_input = gr.Textbox(label="Arguments (JSON)", value="{}", lines=2) | |
| gr.Markdown("**Episode Log**") | |
| episode_log = gr.HTML('<div id="episode-log" class="yellow-text">Press "Reset Episode" to generate a scenario...</div>') | |
| # RIGHT COLUMN: Rewards & Overview | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 🧠 PrivilegeDesk Agent Loop") | |
| gr.HTML(""" | |
| <div style="background: #1e293b; padding: 15px; border-radius: 8px; border: 1px solid #334155;"> | |
| <div style="display: flex; justify-content: space-between; margin-bottom: 20px;"> | |
| <span style="background: #312e81; color: #a5b4fc; padding: 5px 10px; border-radius: 5px;">Actor<br/><small>LLM generates</small></span> | |
| <span style="color: #cbd5e1; align-self: center;">→</span> | |
| <span style="background: #0284c7; color: #bae6fd; padding: 5px 10px; border-radius: 5px;">Execute<br/><small>Tool call</small></span> | |
| <span style="color: #cbd5e1; align-self: center;">→</span> | |
| <span style="background: #065f46; color: #a7f3d0; padding: 5px 10px; border-radius: 5px;">Reflect<br/><small>Update memory</small></span> | |
| </div> | |
| <ul style="color: #cbd5e1; font-size: 0.9rem; padding-left: 20px;"> | |
| <li><b>Goal Inference</b> – Read request/audit logs</li> | |
| <li><b>Policy Verification</b> – Cross-reference IAM rules</li> | |
| <li><b>Action Execution</b> – Approve/Deny/Revoke</li> | |
| </ul> | |
| </div> | |
| """) | |
| gr.Markdown("### 🎯 Grading Rubrics (Subgoals)") | |
| gr.HTML(""" | |
| <div style="background: #1e293b; padding: 15px; border-radius: 8px; border: 1px solid #334155; font-size: 0.85em; color: #cbd5e1;"> | |
| <div style="margin-bottom: 10px;"><b>Task 1: Access Decision (Easy)</b><br/>View Req → Check Entitlements → Policy Lookup → Decide</div> | |
| <div style="margin-bottom: 10px;"><b>Task 2: JIT Escalation (Medium)</b><br/>Route Approval → Attach Ticket → Set TTL → Grant</div> | |
| <div><b>Task 3: Access Review (Hard)</b><br/>Audit Usage → Resolve Groups → Test Workflows → Revoke Risks</div> | |
| </div> | |
| """) | |
| # ─── TOOL INVENTORY ─────────────────────────────────────────────────── | |
| gr.Markdown("### 🔧 Tool Inventory – 19 Tools") | |
| gr.HTML(""" | |
| <div style="background: #1e293b; padding: 20px; border-radius: 8px; border: 1px solid #334155;"> | |
| <div class="tool-group"> | |
| <h4>POLICY (2)</h4> | |
| <span class="tool-tag">policy.lookup</span> <span class="tool-tag">policy.list</span> | |
| </div> | |
| <div class="tool-group"> | |
| <h4>ORGANIZATION (3)</h4> | |
| <span class="tool-tag">org.get_user</span> <span class="tool-tag">org.get_manager</span> <span class="tool-tag">org.list_users</span> | |
| </div> | |
| <div class="tool-group"> | |
| <h4>ENTITLEMENTS & GROUPS (4)</h4> | |
| <span class="tool-tag">entitlement.list</span> <span class="tool-tag">entitlement.inspect</span> <span class="tool-tag">entitlement.revoke</span> <span class="tool-tag">group.resolve</span> | |
| </div> | |
| <div class="tool-group"> | |
| <h4>REQUEST & APPROVAL (4)</h4> | |
| <span class="tool-tag">request.view</span> <span class="tool-tag">request.list</span> <span class="tool-tag">approval.route</span> <span class="tool-tag">approval.check_status</span> | |
| </div> | |
| <div class="tool-group"> | |
| <h4>ACCESS CONTROL (3)</h4> | |
| <span class="tool-tag">access.decide</span> <span class="tool-tag">access.grant</span> <span class="tool-tag">access.set_ttl</span> | |
| </div> | |
| <div class="tool-group"> | |
| <h4>AUDIT & WORKFLOW (3)</h4> | |
| <span class="tool-tag">audit.query</span> <span class="tool-tag">workflow.check_active</span> <span class="tool-tag">review.submit</span> | |
| </div> | |
| </div> | |
| """) | |
| # State definitions | |
| # Local mirror of what would happen via FastApi requests so that | |
| # the UI is perfectly synced with Server app instance | |
| state_log = gr.State([]) | |
| state_steps = gr.State(0) | |
| state_reward = gr.State(0.0) | |
| # ─── LOGIC BINDINGS ─────────────────────────────────────────────────── | |
| def format_log(logs): | |
| html = "" | |
| for l in logs: | |
| html += f"<div>> {l}</div>" | |
| if not html: | |
| html = "Waiting for episode start..." | |
| return f'<div id="episode-log">{html}</div>' | |
| def on_reset(task, diff, seed): | |
| import requests # we will call our own endpoints locally | |
| diff_level = int(diff.split(" ")[1]) | |
| res = requests.post("http://127.0.0.1:8000/reset", json={ | |
| "task_id": task, | |
| "difficulty_level": diff_level, | |
| "seed": int(seed) if seed else 42 | |
| }) | |
| if res.status_code == 200: | |
| data = res.json() | |
| init_obs = json.dumps(data.get("observation", {}), indent=2) | |
| logs = [f"Episode reset. Task: {task}", f"Observation: {init_obs}"] | |
| return format_log(logs), logs, "Env reset successful.", make_stat_box("0", "Current Step"), make_stat_box("0.0", "Cum. Reward"), 0, 0.0 | |
| else: | |
| return format_log([f"Error resetting: {res.text}"]), [], "Failed to reset.", make_stat_box("-", "Current Step"), make_stat_box("-", "Cum. Reward"), 0, 0.0 | |
| def on_step(tool_name, args_str, current_logs, current_steps, current_reward): | |
| import requests | |
| try: | |
| args = json.loads(args_str) | |
| except: | |
| logs = current_logs + ["Error: Arguments must be valid JSON"] | |
| return format_log(logs), logs, "Invalid JSON", make_stat_box(str(current_steps), "Current Step"), make_stat_box(f"{current_reward:.2f}", "Cum. Reward"), current_steps, current_reward | |
| res = requests.post("http://127.0.0.1:8000/step", json={ | |
| "action": {"tool_name": tool_name, "arguments": args} | |
| }) | |
| if res.status_code == 200: | |
| data = res.json() | |
| obs = json.dumps(data.get("observation", {}), indent=2) | |
| rew = data.get("reward", 0.0) | |
| done = data.get("done", False) | |
| info = data.get("info", {}) | |
| step_val = current_steps + 1 | |
| rew_val = current_reward + rew | |
| new_logs = current_logs + [ | |
| f"Action: {tool_name}({args_str})", | |
| f"Reward: {rew} | Terminated: {done}", | |
| f"Observation: {obs[:300]}{'...' if len(obs) > 300 else ''}" | |
| ] | |
| if done: | |
| # fetch grader | |
| grade_res = requests.post("http://127.0.0.1:8000/grader") | |
| if grade_res.status_code == 200: | |
| score = grade_res.json().get("score", 0.0) | |
| new_logs.append(f"Episode Done. Final Score: {score}") | |
| return ( | |
| format_log(new_logs), | |
| new_logs, | |
| "Step successful.", | |
| make_stat_box(str(step_val), "Current Step"), | |
| make_stat_box(f"{rew_val:.2f}", "Cum. Reward"), | |
| step_val, | |
| rew_val | |
| ) | |
| else: | |
| logs = current_logs + [f"Error on step: {res.text}"] | |
| return format_log(logs), logs, "Step Failed", make_stat_box(str(current_steps), "Current Step"), make_stat_box(f"{current_reward:.2f}", "Cum. Reward"), current_steps, current_reward | |
| reset_btn.click( | |
| on_reset, | |
| inputs=[task_dropdown, difficulty_dropdown, seed_input], | |
| outputs=[episode_log, state_log, last_result, step_disp, reward_disp, state_steps, state_reward] | |
| ) | |
| step_btn.click( | |
| on_step, | |
| inputs=[tool_dropdown, args_input, state_log, state_steps, state_reward], | |
| outputs=[episode_log, state_log, last_result, step_disp, reward_disp, state_steps, state_reward] | |
| ) | |
| return demo | |