Spaces:

ar9av
/

sql-agent-openenv

Sleeping

App Files Files Community

ar9avg commited on 12 days ago

Commit

4ec680a

1 Parent(s): b00a200

fix

Browse files

Files changed (1) hide show

backend/api/demo.py +69 -56

backend/api/demo.py CHANGED Viewed

@@ -176,73 +176,69 @@ async def execute_query_stream(req: ExecuteQueryRequest):
         # Initial generate action
         action = Action(repair_action="generate")
         for attempt in range(1, max_attempts + 1):
             yield {"data": json.dumps({"type": "attempt_start", "attempt": attempt})}
             ep = env._episode  # type: ignore[union-attr]
             ep.attempt_number = attempt
-            # Generate SQL with streaming
-            from env.sql_env import _make_client, _MODEL
-            from openai import AsyncOpenAI
-            if attempt == 1 or ep.current_sql is None:
-                system_prompt = BASE_SYSTEM_PROMPT
-                # Include previous wrong SQL if user retried after marking wrong
-                prev_context = ""
-                if req.previousSql:
-                    prev_context = (
-                        f"\nNOTE: A previous attempt generated the following SQL which was marked INCORRECT:\n"
-                        f"```sql\n{req.previousSql}\n```\n"
-                        f"You MUST try a completely different approach.\n"
-                    )
-                user_msg = (
-                    f"Schema:\n{obs.schema_info}\n\nQuestion: {req.question}\n"
-                    f"{prev_context}\n"
-                    "Write a SQL query to answer this question."
-                )
-            else:
-                from rl.repair_strategies import RepairContext, get_repair_system_suffix, build_repair_user_message
-                # Bandit selects action
-                if ep.current_features is not None:
-                    repair_enum, scores = env._bandit.select_action(ep.current_features)
-                    ucb_scores = {
-                        REPAIR_ACTION_NAMES[RepairAction(i)]: round(scores[i], 4)
-                        for i in range(len(scores))
-                    }
-                    action = Action(repair_action=REPAIR_ACTION_NAMES[repair_enum])
-                    yield {"data": json.dumps({
-                        "type": "rl_action",
-                        "action": action.repair_action,
-                        "ucb_scores": ucb_scores,
-                    })}
-                else:
-                    repair_enum = RepairAction.REWRITE_FULL
-                    action = Action(repair_action="rewrite_full")
-                suffix = get_repair_system_suffix(repair_enum)
-                offending = extract_offending_token(ep.error_message or "")
-                ctx = RepairContext(
-                    schema=obs.schema_info,
-                    question=req.question,
-                    failing_sql=ep.current_sql or "",
-                    error_message=ep.error_message or "",
-                    offending_token=offending,
-                )
-                system_prompt = BASE_SYSTEM_PROMPT + suffix
-                user_msg = build_repair_user_message(repair_enum, ctx)
-            # Stream SQL generation
             client = _make_client()
             chunks: list[str] = []
             try:
                 stream = await client.chat.completions.create(
                     model=_MODEL,
-                    messages=[
-                        {"role": "system", "content": system_prompt},
-                        {"role": "user", "content": user_msg},
-                    ],
                     stream=True,
                     temperature=0.1,
                 )
@@ -385,6 +381,23 @@ async def execute_query_stream(req: ExecuteQueryRequest):
                 })}
                 done = True
                 break
         total_reward = compute_episode_reward(all_step_rewards, success)

         # Initial generate action
         action = Action(repair_action="generate")
+        from env.sql_env import _make_client, _MODEL
+        from rl.repair_strategies import RepairContext, get_repair_system_suffix, build_repair_user_message
+        # Build initial user message (includes previous-wrong-SQL context if retrying)
+        prev_context = ""
+        if req.previousSql:
+            prev_context = (
+                f"\nNOTE: A previous session generated the following SQL which was marked INCORRECT:\n"
+                f"```sql\n{req.previousSql}\n```\n"
+                f"You MUST try a completely different approach.\n"
+            )
+        initial_user_msg = (
+            f"Schema:\n{obs.schema_info}\n\nQuestion: {req.question}\n"
+            f"{prev_context}\n"
+            "Write a SQL query to answer this question."
+        )
+        # Multi-turn conversation — grows with each failed attempt so the LLM
+        # sees its own history and doesn't repeat the same mistake.
+        conversation: list[dict] = [
+            {"role": "system", "content": BASE_SYSTEM_PROMPT},
+            {"role": "user", "content": initial_user_msg},
+        ]
         for attempt in range(1, max_attempts + 1):
             yield {"data": json.dumps({"type": "attempt_start", "attempt": attempt})}
             ep = env._episode  # type: ignore[union-attr]
             ep.attempt_number = attempt
+            # On repair attempts, update system prompt with RL-selected repair suffix
+            if attempt > 1 and ep.current_features is not None:
+                repair_enum, scores = env._bandit.select_action(ep.current_features)
+                ucb_scores = {
+                    REPAIR_ACTION_NAMES[RepairAction(i)]: round(scores[i], 4)
+                    for i in range(len(scores))
+                }
+                action = Action(repair_action=REPAIR_ACTION_NAMES[repair_enum])
+                yield {"data": json.dumps({
+                    "type": "rl_action",
+                    "action": action.repair_action,
+                    "ucb_scores": ucb_scores,
+                })}
+                # Update system prompt with repair-specific guidance
+                conversation[0] = {
+                    "role": "system",
+                    "content": BASE_SYSTEM_PROMPT + get_repair_system_suffix(repair_enum),
+                }
+            elif attempt > 1:
+                repair_enum = RepairAction.REWRITE_FULL
+                action = Action(repair_action="rewrite_full")
+                conversation[0] = {
+                    "role": "system",
+                    "content": BASE_SYSTEM_PROMPT + get_repair_system_suffix(repair_enum),
+                }
+            # Stream SQL generation using the full conversation history
             client = _make_client()
             chunks: list[str] = []
             try:
                 stream = await client.chat.completions.create(
                     model=_MODEL,
+                    messages=conversation,
                     stream=True,
                     temperature=0.1,
                 )
                 })}
                 done = True
                 break
+            else:
+                # Append failed attempt to conversation so the next attempt has full history.
+                # This prevents the LLM from repeating the same mistake on subsequent tries.
+                conversation.append({"role": "assistant", "content": generated_sql})
+                if error:
+                    offending = extract_offending_token(error)
+                    feedback_msg = (
+                        f"That SQL failed with this error:\n{error}\n"
+                        + (f"Problematic token: '{offending}'\n" if offending else "")
+                        + "Please fix the SQL. Do NOT repeat the same mistake."
+                    )
+                else:
+                    feedback_msg = (
+                        "That SQL ran but returned incorrect or empty results. "
+                        "Please try a completely different approach."
+                    )
+                conversation.append({"role": "user", "content": feedback_msg})
         total_reward = compute_episode_reward(all_step_rewards, success)