|
import os |
|
import gradio as gr |
|
import requests |
|
import pandas as pd |
|
|
|
from tools import AnswerTool |
|
from smolagents import CodeAgent, OpenAIServerModel |
|
from smolagents import DuckDuckGoSearchTool, WikipediaSearchTool |
|
|
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
|
|
class BasicAgent: |
|
def __init__(self): |
|
|
|
model = OpenAIServerModel(model_id="gpt-4o") |
|
answer_tool = AnswerTool() |
|
web_tool = DuckDuckGoSearchTool() |
|
wiki_tool = WikipediaSearchTool() |
|
self.agent = CodeAgent( |
|
model=model, |
|
tools=[answer_tool, web_tool, wiki_tool], |
|
add_base_tools=True, |
|
max_steps=2, |
|
verbosity_level=0 |
|
) |
|
|
|
def __call__(self, question: str) -> str: |
|
|
|
return self.agent.run(question) |
|
|
|
def run_and_submit_all(username): |
|
if not username: |
|
return "Please enter your Hugging Face username.", None |
|
|
|
|
|
try: |
|
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15) |
|
if resp.status_code == 429: |
|
return "Server rate limited the requests. Please wait a moment and try again.", None |
|
resp.raise_for_status() |
|
questions = resp.json() |
|
except Exception as e: |
|
return f"Error fetching questions: {e}", None |
|
|
|
|
|
agent = BasicAgent() |
|
results = [] |
|
payload = [] |
|
for q in questions: |
|
tid = q.get("task_id") |
|
text = q.get("question") |
|
if not (tid and text): |
|
continue |
|
try: |
|
ans = agent(text) |
|
except Exception as e: |
|
ans = f"ERROR: {e}" |
|
results.append({"Task ID": tid, "Question": text, "Answer": ans}) |
|
payload.append({"task_id": tid, "submitted_answer": ans}) |
|
|
|
if not payload: |
|
return "Agent returned no answers.", pd.DataFrame(results) |
|
|
|
|
|
submission = { |
|
"username": username, |
|
"agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main", |
|
"answers": payload |
|
} |
|
try: |
|
sub_resp = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60) |
|
sub_resp.raise_for_status() |
|
data = sub_resp.json() |
|
status = ( |
|
f"Submission Successful!\n" |
|
f"User: {data.get('username')}\n" |
|
f"Score: {data.get('score')}% ({data.get('correct_count')}/{data.get('total_attempted')})\n" |
|
f"Message: {data.get('message')}" |
|
) |
|
except Exception as e: |
|
status = f"Submission Failed: {e}" |
|
|
|
return status, pd.DataFrame(results) |
|
|
|
def test_random_question(username): |
|
if not username: |
|
return "Please enter your Hugging Face username.", "" |
|
try: |
|
q = requests.get(f"{DEFAULT_API_URL}/random-question", timeout=15).json() |
|
question = q.get("question", "") |
|
ans = BasicAgent()(question) |
|
return question, ans |
|
except Exception as e: |
|
return f"Error during test: {e}", "" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Basic Agent Evaluation Runner") |
|
gr.Markdown( |
|
""" |
|
**Instructions:** |
|
1. Enter your Hugging Face username. |
|
2. Use **Test Random Question** to check a single question. |
|
3. Use **Run Evaluation & Submit All Answers** to evaluate on all questions. |
|
""" |
|
) |
|
|
|
username_input = gr.Textbox(label="Hugging Face Username", placeholder="your-username") |
|
run_btn = gr.Button("Run Evaluation & Submit All Answers") |
|
test_btn = gr.Button("Test Random Question") |
|
|
|
status_out = gr.Textbox(label="Status / Result", lines=5, interactive=False) |
|
table_out = gr.DataFrame(label="Full Results Table", wrap=True) |
|
question_out = gr.Textbox(label="Random Question", lines=3, interactive=False) |
|
answer_out = gr.Textbox(label="Agent Answer", lines=3, interactive=False) |
|
|
|
run_btn.click(fn=run_and_submit_all, inputs=[username_input], outputs=[status_out, table_out]) |
|
test_btn.click(fn=test_random_question, inputs=[username_input], outputs=[question_out, answer_out]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True, share=False) |
|
|