Final_Assignment_Template

Paused

App Files Files Community

darrenphodgson76 commited on 24 days ago

Commit

63d6133

verified ·

1 Parent(s): 2ae2c93

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -37

app.py CHANGED Viewed

@@ -25,55 +25,59 @@ def summarize_query(query: str) -> str:
 search_tool = DuckDuckGoSearchTool()
-# --- ReACT + Scratchpad + Retry Prompt ---
-system_prompt = """
 You are a ReACT agent with scratchpad memory and a retry mechanism.
 1. Thought: Figure out what's needed.
 2. Action: (Optional) Call a tool with a precise query.
 3. Observation: Record tool output.
-If the first Observation is empty/irrelevant:
-4. Thought: Unclear result, reframe and retry.
-5. Action: summarize_query(original query).
 6. Action: DuckDuckGoSearchTool(reframed query).
 7. Observation: Record new result.
 Then:
-8. Thought: Reflect using all observations.
 9. FINAL ANSWER: Provide your answer.
-Formatting:
-- Start with FINAL ANSWER: [your answer]
-- Numbers plain (no commas unless list)
-- Strings no articles unless part of proper names
-- Lists comma-separated, no extra punctuation
 """
 # --- Build the Smart Agent ---
 smart_agent = CodeAgent(
     tools=[search_tool, summarize_query],
-    model=HfApiModel(),             # no prompt here
-    system_prompt=system_prompt     # prompt passed to CodeAgent
 )
-# --- Hook into Gradio App ---
 class BasicAgent:
     def __init__(self):
-        print("SmolAgent (ReACT + Scratchpad + Retry) initialized.")
     def __call__(self, question: str) -> str:
-        print(f"Q: {question[:50]}...")
-        return smart_agent.run(question)
 def run_and_submit_all(profile: gr.OAuthProfile | None):
-    space_id = os.getenv("SPACE_ID")
     if not profile:
-        return "Please log in with Hugging Face.", None
     username = profile.username
     agent = BasicAgent()
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
@@ -87,50 +91,57 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     except Exception as e:
         return f"Error fetching questions: {e}", None
-    # 2. Run agent
-    logs, payload = [], []
     for item in questions:
         tid = item.get("task_id")
         q = item.get("question")
         if not tid or q is None:
             continue
-        try:
-            ans = agent(q)
-        except Exception as e:
-            ans = f"AGENT ERROR: {e}"
         logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
         payload.append({"task_id": tid, "submitted_answer": ans})
     if not payload:
         return "Agent did not produce any answers.", pd.DataFrame(logs)
-    # 3. Submit
-    sub = {"username": username, "agent_code": agent_code, "answers": payload}
     try:
-        post = requests.post(f"{DEFAULT_API_URL}/submit", json=sub, timeout=60)
         post.raise_for_status()
         res = post.json()
         status = (
             f"Submission Successful!\n"
             f"User: {res.get('username')}\n"
-            f"Score: {res.get('score', 'N/A')}% "
-            f"({res.get('correct_count', '?')}/"
-            f"{res.get('total_attempted', '?')})"
         )
         return status, pd.DataFrame(logs)
     except Exception as e:
         return f"Submission Failed: {e}", pd.DataFrame(logs)
-# --- Gradio UI ---
 with gr.Blocks() as demo:
     gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
-    gr.Markdown("1) Clone this space  2) Log in  3) Run Evaluation & Submit All Answers")
     gr.LoginButton()
-    btn = gr.Button("Run Evaluation & Submit All Answers")
-    out_status = gr.Textbox(label="Run Status", lines=5, interactive=False)
-    out_table  = gr.DataFrame(label="Results")
-    btn.click(fn=run_and_submit_all, outputs=[out_status, out_table])
 if __name__ == "__main__":
     demo.launch(debug=True, share=False)

 search_tool = DuckDuckGoSearchTool()
+# --- ReACT + Scratchpad + Auto-Retry Instructions ---
+instruction_prompt = """
 You are a ReACT agent with scratchpad memory and a retry mechanism.
+For every question:
 1. Thought: Figure out what's needed.
 2. Action: (Optional) Call a tool with a precise query.
 3. Observation: Record tool output.
+If the first Observation is empty or irrelevant:
+4. Thought: Unclear result; reframe and retry.
+5. Action: summarize_query(original question).
 6. Action: DuckDuckGoSearchTool(reframed query).
 7. Observation: Record new result.
 Then:
+8. Thought: Reflect on all observations.
 9. FINAL ANSWER: Provide your answer.
+Formatting rules:
+- Begin with `FINAL ANSWER: [your answer]`
+- Numbers: plain (no commas unless in a list)
+- Strings: no articles unless part of proper names
+- Lists: comma-separated without extra punctuation
 """
 # --- Build the Smart Agent ---
 smart_agent = CodeAgent(
     tools=[search_tool, summarize_query],
+    model=HfApiModel()
 )
+# --- Integrate into Gradio App ---
 class BasicAgent:
     def __init__(self):
+        print("SmolAgent with ReACT, Scratchpad & Retry initialized.")
     def __call__(self, question: str) -> str:
+        full_input = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip()
+        print(f"Agent input (first 100 chars): {full_input[:100]}...")
+        try:
+            return smart_agent.run(full_input)
+        except Exception as e:
+            return f"AGENT ERROR: {e}"
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     if not profile:
+        return "Please log in to Hugging Face using the login button above.", None
     username = profile.username
+    space_id = os.getenv("SPACE_ID", "")
     agent = BasicAgent()
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     except Exception as e:
         return f"Error fetching questions: {e}", None
+    # 2. Run agent on each question
+    logs = []
+    payload = []
     for item in questions:
         tid = item.get("task_id")
         q = item.get("question")
         if not tid or q is None:
             continue
+        ans = agent(q)
         logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
         payload.append({"task_id": tid, "submitted_answer": ans})
     if not payload:
         return "Agent did not produce any answers.", pd.DataFrame(logs)
+    # 3. Submit answers
+    submission = {"username": username, "agent_code": agent_code, "answers": payload}
     try:
+        post = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
         post.raise_for_status()
         res = post.json()
         status = (
             f"Submission Successful!\n"
             f"User: {res.get('username')}\n"
+            f"Overall Score: {res.get('score', 'N/A')}% "
+            f"({res.get('correct_count', '?')}/{res.get('total_attempted', '?')})\n"
+            f"Message: {res.get('message', '')}"
         )
         return status, pd.DataFrame(logs)
     except Exception as e:
         return f"Submission Failed: {e}", pd.DataFrame(logs)
+# --- Gradio Interface ---
 with gr.Blocks() as demo:
     gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
+    gr.Markdown(
+        """
+        **Instructions:**
+        1. Clone this space and modify if needed.
+        2. Log in to Hugging Face.
+        3. Click **Run Evaluation & Submit All Answers** to evaluate your agent.
+        **Note:** Evaluation may take several minutes.
+        """
+    )
     gr.LoginButton()
+    run_btn = gr.Button("Run Evaluation & Submit All Answers")
+    status_out = gr.Textbox(label="Status", lines=5, interactive=False)
+    table_out = gr.DataFrame(label="Questions & Answers", wrap=True)
+    run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])
 if __name__ == "__main__":
     demo.launch(debug=True, share=False)