|
import os |
|
import openai |
|
import gradio as gr |
|
import requests |
|
import pandas as pd |
|
|
|
|
|
openai.api_key = os.getenv("OPENAI_API_KEY") |
|
API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
MODEL_NAME = "gpt-4.1" |
|
|
|
|
|
def ask_chatgpt_4_1(question: str) -> str: |
|
response = openai.ChatCompletion.create( |
|
model=MODEL_NAME, |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful assistant."}, |
|
{"role": "user", "content": question} |
|
], |
|
temperature=0.7, |
|
max_tokens=1500 |
|
) |
|
return response.choices[0].message.content |
|
|
|
|
|
class BasicAgent: |
|
def __init__(self): |
|
print("BasicAgent using OpenAI GPT-4.1 ready.") |
|
|
|
def __call__(self, question: str) -> str: |
|
print(f"Q>> {question}") |
|
try: |
|
return ask_chatgpt_4_1(question) |
|
except Exception as e: |
|
return f"AGENT ERROR: {e}" |
|
|
|
|
|
def run_and_submit_all(profile: gr.OAuthProfile | None): |
|
if not profile: |
|
return "Please log in to Hugging Face.", None |
|
username = profile.username |
|
space_id = os.getenv("SPACE_ID", "") |
|
agent = BasicAgent() |
|
code_link = f"https://huggingface.co/spaces/{space_id}/tree/main" |
|
|
|
|
|
try: |
|
resp = requests.get(f"{API_URL}/questions", timeout=15) |
|
resp.raise_for_status() |
|
questions = resp.json() |
|
if not questions: |
|
return "No questions fetched.", None |
|
except Exception as e: |
|
return f"Error fetching questions: {e}", None |
|
|
|
|
|
logs = [] |
|
payload = [] |
|
for item in questions: |
|
tid = item.get("task_id") |
|
q = item.get("question") |
|
if not tid or q is None: |
|
continue |
|
ans = agent(q) |
|
logs.append({"Task ID": tid, "Question": q, "Answer": ans}) |
|
payload.append({"task_id": tid, "submitted_answer": ans}) |
|
|
|
if not payload: |
|
return "No answers generated.", pd.DataFrame(logs) |
|
|
|
|
|
submission = { |
|
"username": username, |
|
"agent_code": code_link, |
|
"answers": payload |
|
} |
|
try: |
|
post = requests.post(f"{API_URL}/submit", json=submission, timeout=60) |
|
post.raise_for_status() |
|
res = post.json() |
|
status = ( |
|
f"Success! {res.get('username')} scored " |
|
f"{res.get('score', 'N/A')}% " |
|
f"({res.get('correct_count')}/{res.get('total_attempted')})" |
|
) |
|
return status, pd.DataFrame(logs) |
|
except Exception as e: |
|
return f"Submission Failed: {e}", pd.DataFrame(logs) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# SmolAgent GAIA Evaluation Runner π") |
|
gr.Markdown( |
|
""" |
|
1. Clone space and modify if needed |
|
2. Log in to Hugging Face |
|
3. Click **Run Evaluation & Submit All Answers** |
|
(May take several minutes) |
|
""" |
|
) |
|
gr.LoginButton() |
|
run_btn = gr.Button("Run Evaluation & Submit All Answers") |
|
status_out = gr.Textbox(label="Status", lines=5, interactive=False) |
|
table_out = gr.DataFrame(label="Q&A Log", wrap=True) |
|
|
|
run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True, share=False) |
|
|