Update app.py
Browse files
app.py
CHANGED
@@ -25,55 +25,59 @@ def summarize_query(query: str) -> str:
|
|
25 |
|
26 |
search_tool = DuckDuckGoSearchTool()
|
27 |
|
28 |
-
# --- ReACT + Scratchpad + Retry
|
29 |
|
30 |
-
|
31 |
You are a ReACT agent with scratchpad memory and a retry mechanism.
|
32 |
|
|
|
33 |
1. Thought: Figure out what's needed.
|
34 |
2. Action: (Optional) Call a tool with a precise query.
|
35 |
3. Observation: Record tool output.
|
36 |
|
37 |
-
If the first Observation is empty
|
38 |
-
4. Thought: Unclear result
|
39 |
-
5. Action: summarize_query(original
|
40 |
6. Action: DuckDuckGoSearchTool(reframed query).
|
41 |
7. Observation: Record new result.
|
42 |
|
43 |
Then:
|
44 |
-
8. Thought: Reflect
|
45 |
9. FINAL ANSWER: Provide your answer.
|
46 |
|
47 |
-
Formatting:
|
48 |
-
-
|
49 |
-
- Numbers plain (no commas unless list)
|
50 |
-
- Strings no articles unless part of proper names
|
51 |
-
- Lists comma-separated
|
52 |
"""
|
53 |
|
54 |
# --- Build the Smart Agent ---
|
55 |
|
56 |
smart_agent = CodeAgent(
|
57 |
tools=[search_tool, summarize_query],
|
58 |
-
model=HfApiModel()
|
59 |
-
system_prompt=system_prompt # prompt passed to CodeAgent
|
60 |
)
|
61 |
|
62 |
-
# ---
|
63 |
|
64 |
class BasicAgent:
|
65 |
def __init__(self):
|
66 |
-
print("SmolAgent
|
67 |
|
68 |
def __call__(self, question: str) -> str:
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
71 |
|
72 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
73 |
-
space_id = os.getenv("SPACE_ID")
|
74 |
if not profile:
|
75 |
-
return "Please log in
|
76 |
username = profile.username
|
|
|
77 |
agent = BasicAgent()
|
78 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
79 |
|
@@ -87,50 +91,57 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
87 |
except Exception as e:
|
88 |
return f"Error fetching questions: {e}", None
|
89 |
|
90 |
-
# 2. Run agent
|
91 |
-
logs
|
|
|
92 |
for item in questions:
|
93 |
tid = item.get("task_id")
|
94 |
q = item.get("question")
|
95 |
if not tid or q is None:
|
96 |
continue
|
97 |
-
|
98 |
-
ans = agent(q)
|
99 |
-
except Exception as e:
|
100 |
-
ans = f"AGENT ERROR: {e}"
|
101 |
logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
|
102 |
payload.append({"task_id": tid, "submitted_answer": ans})
|
103 |
|
104 |
if not payload:
|
105 |
return "Agent did not produce any answers.", pd.DataFrame(logs)
|
106 |
|
107 |
-
# 3. Submit
|
108 |
-
|
109 |
try:
|
110 |
-
post = requests.post(f"{DEFAULT_API_URL}/submit", json=
|
111 |
post.raise_for_status()
|
112 |
res = post.json()
|
113 |
status = (
|
114 |
f"Submission Successful!\n"
|
115 |
f"User: {res.get('username')}\n"
|
116 |
-
f"Score: {res.get('score', 'N/A')}% "
|
117 |
-
f"({res.get('correct_count', '?')}/"
|
118 |
-
f"{res.get('
|
119 |
)
|
120 |
return status, pd.DataFrame(logs)
|
121 |
except Exception as e:
|
122 |
return f"Submission Failed: {e}", pd.DataFrame(logs)
|
123 |
|
124 |
-
# --- Gradio
|
125 |
|
126 |
with gr.Blocks() as demo:
|
127 |
gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
|
128 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
gr.LoginButton()
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
134 |
|
135 |
if __name__ == "__main__":
|
136 |
demo.launch(debug=True, share=False)
|
|
|
25 |
|
26 |
search_tool = DuckDuckGoSearchTool()
|
27 |
|
28 |
+
# --- ReACT + Scratchpad + Auto-Retry Instructions ---
|
29 |
|
30 |
+
instruction_prompt = """
|
31 |
You are a ReACT agent with scratchpad memory and a retry mechanism.
|
32 |
|
33 |
+
For every question:
|
34 |
1. Thought: Figure out what's needed.
|
35 |
2. Action: (Optional) Call a tool with a precise query.
|
36 |
3. Observation: Record tool output.
|
37 |
|
38 |
+
If the first Observation is empty or irrelevant:
|
39 |
+
4. Thought: Unclear result; reframe and retry.
|
40 |
+
5. Action: summarize_query(original question).
|
41 |
6. Action: DuckDuckGoSearchTool(reframed query).
|
42 |
7. Observation: Record new result.
|
43 |
|
44 |
Then:
|
45 |
+
8. Thought: Reflect on all observations.
|
46 |
9. FINAL ANSWER: Provide your answer.
|
47 |
|
48 |
+
Formatting rules:
|
49 |
+
- Begin with `FINAL ANSWER: [your answer]`
|
50 |
+
- Numbers: plain (no commas unless in a list)
|
51 |
+
- Strings: no articles unless part of proper names
|
52 |
+
- Lists: comma-separated without extra punctuation
|
53 |
"""
|
54 |
|
55 |
# --- Build the Smart Agent ---
|
56 |
|
57 |
smart_agent = CodeAgent(
|
58 |
tools=[search_tool, summarize_query],
|
59 |
+
model=HfApiModel()
|
|
|
60 |
)
|
61 |
|
62 |
+
# --- Integrate into Gradio App ---
|
63 |
|
64 |
class BasicAgent:
|
65 |
def __init__(self):
|
66 |
+
print("SmolAgent with ReACT, Scratchpad & Retry initialized.")
|
67 |
|
68 |
def __call__(self, question: str) -> str:
|
69 |
+
full_input = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip()
|
70 |
+
print(f"Agent input (first 100 chars): {full_input[:100]}...")
|
71 |
+
try:
|
72 |
+
return smart_agent.run(full_input)
|
73 |
+
except Exception as e:
|
74 |
+
return f"AGENT ERROR: {e}"
|
75 |
|
76 |
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
|
|
77 |
if not profile:
|
78 |
+
return "Please log in to Hugging Face using the login button above.", None
|
79 |
username = profile.username
|
80 |
+
space_id = os.getenv("SPACE_ID", "")
|
81 |
agent = BasicAgent()
|
82 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
83 |
|
|
|
91 |
except Exception as e:
|
92 |
return f"Error fetching questions: {e}", None
|
93 |
|
94 |
+
# 2. Run agent on each question
|
95 |
+
logs = []
|
96 |
+
payload = []
|
97 |
for item in questions:
|
98 |
tid = item.get("task_id")
|
99 |
q = item.get("question")
|
100 |
if not tid or q is None:
|
101 |
continue
|
102 |
+
ans = agent(q)
|
|
|
|
|
|
|
103 |
logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
|
104 |
payload.append({"task_id": tid, "submitted_answer": ans})
|
105 |
|
106 |
if not payload:
|
107 |
return "Agent did not produce any answers.", pd.DataFrame(logs)
|
108 |
|
109 |
+
# 3. Submit answers
|
110 |
+
submission = {"username": username, "agent_code": agent_code, "answers": payload}
|
111 |
try:
|
112 |
+
post = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
|
113 |
post.raise_for_status()
|
114 |
res = post.json()
|
115 |
status = (
|
116 |
f"Submission Successful!\n"
|
117 |
f"User: {res.get('username')}\n"
|
118 |
+
f"Overall Score: {res.get('score', 'N/A')}% "
|
119 |
+
f"({res.get('correct_count', '?')}/{res.get('total_attempted', '?')})\n"
|
120 |
+
f"Message: {res.get('message', '')}"
|
121 |
)
|
122 |
return status, pd.DataFrame(logs)
|
123 |
except Exception as e:
|
124 |
return f"Submission Failed: {e}", pd.DataFrame(logs)
|
125 |
|
126 |
+
# --- Gradio Interface ---
|
127 |
|
128 |
with gr.Blocks() as demo:
|
129 |
gr.Markdown("# SmolAgent GAIA Evaluation Runner 🚀")
|
130 |
+
gr.Markdown(
|
131 |
+
"""
|
132 |
+
**Instructions:**
|
133 |
+
1. Clone this space and modify if needed.
|
134 |
+
2. Log in to Hugging Face.
|
135 |
+
3. Click **Run Evaluation & Submit All Answers** to evaluate your agent.
|
136 |
+
**Note:** Evaluation may take several minutes.
|
137 |
+
"""
|
138 |
+
)
|
139 |
gr.LoginButton()
|
140 |
+
run_btn = gr.Button("Run Evaluation & Submit All Answers")
|
141 |
+
status_out = gr.Textbox(label="Status", lines=5, interactive=False)
|
142 |
+
table_out = gr.DataFrame(label="Questions & Answers", wrap=True)
|
143 |
+
|
144 |
+
run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])
|
145 |
|
146 |
if __name__ == "__main__":
|
147 |
demo.launch(debug=True, share=False)
|