Prithvigg commited on
Commit
a01e90d
Β·
verified Β·
1 Parent(s): 3867c62

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. demo.py +187 -0
  2. server/queryforge_environment.py +0 -1
demo.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ QueryForge β€” Gradio Demo
3
+ ========================
4
+ Interactive SQL debugger UI that runs the environment in-process (no server needed).
5
+
6
+ Run locally:
7
+ python demo.py
8
+ # opens http://localhost:7860
9
+
10
+ On HF Spaces:
11
+ Set ANTHROPIC_API_KEY secret in Space settings for AI judging (optional).
12
+ The demo auto-detects it.
13
+ """
14
+
15
+ import os
16
+ import sys
17
+
18
+ import gradio as gr
19
+
20
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
21
+
22
+ from models import SQLAction
23
+ from server.queryforge_environment import QueryforgeEnvironment
24
+ from tasks import REGISTRY
25
+
26
+ # ── Helpers ───────────────────────────────────────────────────────────────────
27
+
28
+ AI_JUDGE_ACTIVE = bool(os.environ.get("ANTHROPIC_API_KEY"))
29
+
30
+ TASK_CHOICES = [
31
+ (f"[{t.level.upper()}] {t.title}", t.id)
32
+ for t in REGISTRY.list_all()
33
+ ]
34
+
35
+
36
+ def _score_html(score: float, done: bool) -> str:
37
+ filled = int(score * 20)
38
+ bar = "β–ˆ" * filled + "β–‘" * (20 - filled)
39
+ color = "#22c55e" if score >= 0.9 else ("#f59e0b" if score >= 0.5 else "#ef4444")
40
+ suffix = " βœ“ Solved!" if done and score >= 0.9 else (" ⏹ Ended" if done else "")
41
+ return (
42
+ f'<div style="font-family:monospace;font-size:1.1rem;">'
43
+ f'<span style="color:{color}">[{bar}]</span> '
44
+ f'<b>{score:.3f}</b>{suffix}</div>'
45
+ )
46
+
47
+
48
+ # ── Callbacks ─────────────────────────────────────────────────────────────────
49
+
50
+ def load_task(task_id: str):
51
+ """Reset environment and populate UI with the chosen task."""
52
+ env = QueryforgeEnvironment()
53
+ obs = env.reset(task_id=task_id)
54
+ task = REGISTRY.get(task_id)
55
+ state = {"env": env, "history": [], "done": False}
56
+ return (
57
+ obs.task_description, # task description box
58
+ task.broken_query, # pre-fill SQL editor with broken query
59
+ "<div style='color:#6b7280;font-family:monospace'>Submit a query to see your score.</div>",
60
+ "", # clear feedback
61
+ [], # clear history table
62
+ state,
63
+ gr.update(interactive=True), # enable submit button
64
+ )
65
+
66
+
67
+ def submit_query(sql: str, state: dict):
68
+ """Grade the submitted SQL and update all output components."""
69
+ if state is None or "env" not in state:
70
+ return (
71
+ "<div style='color:red'>Load a task first.</div>",
72
+ "", [], state,
73
+ )
74
+ if state.get("done"):
75
+ return (
76
+ "<div style='color:#6b7280'>Episode already ended. Load a new task.</div>",
77
+ "", state["history"], state,
78
+ )
79
+
80
+ env = state["env"]
81
+ obs = env.step(SQLAction(sql=sql.strip()))
82
+ score = obs.reward or 0.0
83
+
84
+ # ── Score HTML ────────────────────────────────────────────────────────────
85
+ score_html = _score_html(score, obs.done)
86
+
87
+ # ── Feedback (split into labelled sections) ───────────────────────────────
88
+ sections = [p.strip() for p in obs.feedback.split(" ") if p.strip()]
89
+ feedback_md = "\n\n".join(f"**{s.split(']')[0].lstrip('[').strip()}**{s.split(']',1)[1] if ']' in s else s}"
90
+ for s in sections)
91
+ if obs.hint and not obs.done:
92
+ feedback_md += f"\n\n> πŸ’‘ **Hint:** {obs.hint}"
93
+
94
+ # ── History table ─────────────────────────────────────────────────────────
95
+ status = "βœ“ Solved" if (obs.done and score >= 0.9) else ("⏹ Ended" if obs.done else "↻ Retry")
96
+ state["history"].append([obs.attempt, f"{score:.3f}", obs.rows_returned, status])
97
+
98
+ state["done"] = obs.done
99
+
100
+ return score_html, feedback_md, state["history"], state
101
+
102
+
103
+ # ── UI layout ─────────────────────────────────────────────────────────────────
104
+
105
+ HEADER = """
106
+ # πŸ”§ QueryForge β€” SQL Debugger & Optimiser
107
+
108
+ Fix broken or slow SQL queries and get instant graded feedback.
109
+ {ai_status}
110
+ """.format(
111
+ ai_status=(
112
+ "🟒 **AI Judge active** β€” scores up to 1.0 (Anthropic)"
113
+ if AI_JUDGE_ACTIVE else
114
+ "🟑 **Deterministic mode** β€” max score 0.80 (set `ANTHROPIC_API_KEY` to enable AI judge)"
115
+ )
116
+ )
117
+
118
+ with gr.Blocks(title="QueryForge", theme=gr.themes.Soft()) as demo:
119
+
120
+ state = gr.State(None)
121
+
122
+ gr.Markdown(HEADER)
123
+
124
+ # ── Task selection row ────────────────────────────────────────────────────
125
+ with gr.Row():
126
+ task_dd = gr.Dropdown(
127
+ choices=TASK_CHOICES,
128
+ value=TASK_CHOICES[0][1],
129
+ label="Select Task",
130
+ scale=4,
131
+ )
132
+ load_btn = gr.Button("Load Task β–Ά", variant="primary", scale=1)
133
+
134
+ # ── Main two-column layout ────────────────────────────────────────────────
135
+ with gr.Row():
136
+ with gr.Column(scale=1):
137
+ task_desc = gr.Textbox(
138
+ label="πŸ“‹ Task Description",
139
+ lines=18,
140
+ interactive=False,
141
+ placeholder="Load a task to see the description and broken query…",
142
+ )
143
+
144
+ with gr.Column(scale=1):
145
+ sql_input = gr.Code(
146
+ label="✏️ Your SQL Query",
147
+ language="sql",
148
+ lines=12,
149
+ )
150
+ submit_btn = gr.Button(
151
+ "Submit Query ⚑",
152
+ variant="primary",
153
+ interactive=False,
154
+ )
155
+
156
+ # ── Score + feedback ──────────────────────────────────────────────────────
157
+ score_html = gr.HTML(
158
+ value="<div style='color:#6b7280;font-family:monospace'>Submit a query to see your score.</div>",
159
+ label="Score",
160
+ )
161
+ feedback_display = gr.Markdown(label="Feedback")
162
+
163
+ # ── Attempt history ───────────────────────────────────────────────────────
164
+ history_table = gr.Dataframe(
165
+ headers=["Step", "Score", "Rows Returned", "Status"],
166
+ datatype=["number", "str", "number", "str"],
167
+ label="πŸ“Š Attempt History",
168
+ interactive=False,
169
+ wrap=True,
170
+ )
171
+
172
+ # ── Wire up events ────────────────────────────────────────────────────────
173
+ load_btn.click(
174
+ load_task,
175
+ inputs=[task_dd],
176
+ outputs=[task_desc, sql_input, score_html, feedback_display, history_table, state, submit_btn],
177
+ )
178
+
179
+ submit_btn.click(
180
+ submit_query,
181
+ inputs=[sql_input, state],
182
+ outputs=[score_html, feedback_display, history_table, state],
183
+ )
184
+
185
+
186
+ if __name__ == "__main__":
187
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
server/queryforge_environment.py CHANGED
@@ -40,7 +40,6 @@ except ImportError:
40
  logger = logging.getLogger(__name__)
41
  _AI_JUDGE_ACTIVE = bool(os.environ.get("ANTHROPIC_API_KEY"))
42
 
43
- print("here", os.environ.get("ANTHROPIC_API_KEY"))
44
  logger.info(
45
  "QueryForge environment loaded | AI judge: %s | done_threshold: %s",
46
  "ACTIVE (scores up to 1.0)" if _AI_JUDGE_ACTIVE else "OFFLINE β€” deterministic only (max score 0.80)",
 
40
  logger = logging.getLogger(__name__)
41
  _AI_JUDGE_ACTIVE = bool(os.environ.get("ANTHROPIC_API_KEY"))
42
 
 
43
  logger.info(
44
  "QueryForge environment loaded | AI judge: %s | done_threshold: %s",
45
  "ACTIVE (scores up to 1.0)" if _AI_JUDGE_ACTIVE else "OFFLINE β€” deterministic only (max score 0.80)",