darrenphodgson76 commited on
Commit
63d6133
·
verified ·
1 Parent(s): 2ae2c93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -37
app.py CHANGED
@@ -25,55 +25,59 @@ def summarize_query(query: str) -> str:
25
 
26
  search_tool = DuckDuckGoSearchTool()
27
 
28
- # --- ReACT + Scratchpad + Retry Prompt ---
29
 
30
- system_prompt = """
31
  You are a ReACT agent with scratchpad memory and a retry mechanism.
32
 
 
33
  1. Thought: Figure out what's needed.
34
  2. Action: (Optional) Call a tool with a precise query.
35
  3. Observation: Record tool output.
36
 
37
- If the first Observation is empty/irrelevant:
38
- 4. Thought: Unclear result, reframe and retry.
39
- 5. Action: summarize_query(original query).
40
  6. Action: DuckDuckGoSearchTool(reframed query).
41
  7. Observation: Record new result.
42
 
43
  Then:
44
- 8. Thought: Reflect using all observations.
45
  9. FINAL ANSWER: Provide your answer.
46
 
47
- Formatting:
48
- - Start with FINAL ANSWER: [your answer]
49
- - Numbers plain (no commas unless list)
50
- - Strings no articles unless part of proper names
51
- - Lists comma-separated, no extra punctuation
52
  """
53
 
54
  # --- Build the Smart Agent ---
55
 
56
  smart_agent = CodeAgent(
57
  tools=[search_tool, summarize_query],
58
- model=HfApiModel(), # no prompt here
59
- system_prompt=system_prompt # prompt passed to CodeAgent
60
  )
61
 
62
- # --- Hook into Gradio App ---
63
 
64
  class BasicAgent:
65
  def __init__(self):
66
- print("SmolAgent (ReACT + Scratchpad + Retry) initialized.")
67
 
68
  def __call__(self, question: str) -> str:
69
- print(f"Q: {question[:50]}...")
70
- return smart_agent.run(question)
 
 
 
 
71
 
72
  def run_and_submit_all(profile: gr.OAuthProfile | None):
73
- space_id = os.getenv("SPACE_ID")
74
  if not profile:
75
- return "Please log in with Hugging Face.", None
76
  username = profile.username
 
77
  agent = BasicAgent()
78
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
79
 
@@ -87,50 +91,57 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
87
  except Exception as e:
88
  return f"Error fetching questions: {e}", None
89
 
90
- # 2. Run agent
91
- logs, payload = [], []
 
92
  for item in questions:
93
  tid = item.get("task_id")
94
  q = item.get("question")
95
  if not tid or q is None:
96
  continue
97
- try:
98
- ans = agent(q)
99
- except Exception as e:
100
- ans = f"AGENT ERROR: {e}"
101
  logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
102
  payload.append({"task_id": tid, "submitted_answer": ans})
103
 
104
  if not payload:
105
  return "Agent did not produce any answers.", pd.DataFrame(logs)
106
 
107
- # 3. Submit
108
- sub = {"username": username, "agent_code": agent_code, "answers": payload}
109
  try:
110
- post = requests.post(f"{DEFAULT_API_URL}/submit", json=sub, timeout=60)
111
  post.raise_for_status()
112
  res = post.json()
113
  status = (
114
  f"Submission Successful!\n"
115
  f"User: {res.get('username')}\n"
116
- f"Score: {res.get('score', 'N/A')}% "
117
- f"({res.get('correct_count', '?')}/"
118
- f"{res.get('total_attempted', '?')})"
119
  )
120
  return status, pd.DataFrame(logs)
121
  except Exception as e:
122
  return f"Submission Failed: {e}", pd.DataFrame(logs)
123
 
124
- # --- Gradio UI ---
125
 
126
  with gr.Blocks() as demo:
127
  gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
128
- gr.Markdown("1) Clone this space 2) Log in 3) Run Evaluation & Submit All Answers")
 
 
 
 
 
 
 
 
129
  gr.LoginButton()
130
- btn = gr.Button("Run Evaluation & Submit All Answers")
131
- out_status = gr.Textbox(label="Run Status", lines=5, interactive=False)
132
- out_table = gr.DataFrame(label="Results")
133
- btn.click(fn=run_and_submit_all, outputs=[out_status, out_table])
 
134
 
135
  if __name__ == "__main__":
136
  demo.launch(debug=True, share=False)
 
25
 
26
  search_tool = DuckDuckGoSearchTool()
27
 
28
+ # --- ReACT + Scratchpad + Auto-Retry Instructions ---
29
 
30
+ instruction_prompt = """
31
  You are a ReACT agent with scratchpad memory and a retry mechanism.
32
 
33
+ For every question:
34
  1. Thought: Figure out what's needed.
35
  2. Action: (Optional) Call a tool with a precise query.
36
  3. Observation: Record tool output.
37
 
38
+ If the first Observation is empty or irrelevant:
39
+ 4. Thought: Unclear result; reframe and retry.
40
+ 5. Action: summarize_query(original question).
41
  6. Action: DuckDuckGoSearchTool(reframed query).
42
  7. Observation: Record new result.
43
 
44
  Then:
45
+ 8. Thought: Reflect on all observations.
46
  9. FINAL ANSWER: Provide your answer.
47
 
48
+ Formatting rules:
49
+ - Begin with `FINAL ANSWER: [your answer]`
50
+ - Numbers: plain (no commas unless in a list)
51
+ - Strings: no articles unless part of proper names
52
+ - Lists: comma-separated without extra punctuation
53
  """
54
 
55
  # --- Build the Smart Agent ---
56
 
57
  smart_agent = CodeAgent(
58
  tools=[search_tool, summarize_query],
59
+ model=HfApiModel()
 
60
  )
61
 
62
+ # --- Integrate into Gradio App ---
63
 
64
  class BasicAgent:
65
  def __init__(self):
66
+ print("SmolAgent with ReACT, Scratchpad & Retry initialized.")
67
 
68
  def __call__(self, question: str) -> str:
69
+ full_input = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip()
70
+ print(f"Agent input (first 100 chars): {full_input[:100]}...")
71
+ try:
72
+ return smart_agent.run(full_input)
73
+ except Exception as e:
74
+ return f"AGENT ERROR: {e}"
75
 
76
  def run_and_submit_all(profile: gr.OAuthProfile | None):
 
77
  if not profile:
78
+ return "Please log in to Hugging Face using the login button above.", None
79
  username = profile.username
80
+ space_id = os.getenv("SPACE_ID", "")
81
  agent = BasicAgent()
82
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
83
 
 
91
  except Exception as e:
92
  return f"Error fetching questions: {e}", None
93
 
94
+ # 2. Run agent on each question
95
+ logs = []
96
+ payload = []
97
  for item in questions:
98
  tid = item.get("task_id")
99
  q = item.get("question")
100
  if not tid or q is None:
101
  continue
102
+ ans = agent(q)
 
 
 
103
  logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
104
  payload.append({"task_id": tid, "submitted_answer": ans})
105
 
106
  if not payload:
107
  return "Agent did not produce any answers.", pd.DataFrame(logs)
108
 
109
+ # 3. Submit answers
110
+ submission = {"username": username, "agent_code": agent_code, "answers": payload}
111
  try:
112
+ post = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
113
  post.raise_for_status()
114
  res = post.json()
115
  status = (
116
  f"Submission Successful!\n"
117
  f"User: {res.get('username')}\n"
118
+ f"Overall Score: {res.get('score', 'N/A')}% "
119
+ f"({res.get('correct_count', '?')}/{res.get('total_attempted', '?')})\n"
120
+ f"Message: {res.get('message', '')}"
121
  )
122
  return status, pd.DataFrame(logs)
123
  except Exception as e:
124
  return f"Submission Failed: {e}", pd.DataFrame(logs)
125
 
126
+ # --- Gradio Interface ---
127
 
128
  with gr.Blocks() as demo:
129
  gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
130
+ gr.Markdown(
131
+ """
132
+ **Instructions:**
133
+ 1. Clone this space and modify if needed.
134
+ 2. Log in to Hugging Face.
135
+ 3. Click **Run Evaluation & Submit All Answers** to evaluate your agent.
136
+ **Note:** Evaluation may take several minutes.
137
+ """
138
+ )
139
  gr.LoginButton()
140
+ run_btn = gr.Button("Run Evaluation & Submit All Answers")
141
+ status_out = gr.Textbox(label="Status", lines=5, interactive=False)
142
+ table_out = gr.DataFrame(label="Questions & Answers", wrap=True)
143
+
144
+ run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])
145
 
146
  if __name__ == "__main__":
147
  demo.launch(debug=True, share=False)