|
import os |
|
import logging |
|
import openai |
|
from openai import OpenAI |
|
import gradio as gr |
|
import requests |
|
import pandas as pd |
|
from smolagents import CodeAgent, DuckDuckGoSearchTool, tool |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" |
|
MAX_PROMPT_LENGTH = 15000 |
|
|
|
|
|
openai_api_key = os.getenv("OPENAI_API_KEY") |
|
if not openai_api_key: |
|
raise RuntimeError("Environment variable OPENAI_API_KEY is required.") |
|
openai.api_key = openai_api_key |
|
client = OpenAI() |
|
OPENAI_MODEL_ID = os.getenv("OPENAI_MODEL_ID", "gpt-4.1") |
|
|
|
|
|
class OpenAIModelWrapper: |
|
def __init__(self, model_id: str, client: OpenAI): |
|
self.model_id = model_id |
|
self.client = client |
|
|
|
def __call__(self, prompt: str, **kwargs) -> str: |
|
try: |
|
resp = self.client.responses.create( |
|
model=self.model_id, |
|
input=prompt |
|
) |
|
return getattr(resp, "output_text", str(resp)) |
|
except Exception as e: |
|
logger.exception("OpenAI inference error") |
|
return f"AGENT ERROR (inference): {e}" |
|
|
|
|
|
|
|
@tool |
|
def summarize_query(query: str) -> str: |
|
""" |
|
Reframes an unclear search query to improve relevance. |
|
|
|
Args: |
|
query (str): The original search query needing refinement. |
|
|
|
Returns: |
|
str: A concise, improved version of the query. |
|
""" |
|
return f"Summarize and reframe: {query}" |
|
|
|
@tool |
|
def wikipedia_search(page: str) -> str: |
|
""" |
|
Fetches the summary extract of an English Wikipedia page via the REST API. |
|
|
|
Args: |
|
page (str): Title of the Wikipedia page (e.g. 'Mercedes_Sosa_discography'). |
|
|
|
Returns: |
|
str: The page's summary (or an error message). |
|
""" |
|
try: |
|
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page}" |
|
resp = requests.get(url, timeout=10) |
|
resp.raise_for_status() |
|
extract = resp.json().get("extract", "") |
|
if not extract: |
|
logger.warning("Wikipedia returned empty extract for %s", page) |
|
return extract |
|
except Exception as e: |
|
logger.exception("Wikipedia lookup failed") |
|
return f"Wikipedia error: {e}" |
|
|
|
|
|
search_tool = DuckDuckGoSearchTool() |
|
wiki_tool = wikipedia_search |
|
summarize_tool = summarize_query |
|
|
|
|
|
|
|
instruction_prompt = """ |
|
You are a ReACT agent with three tools: |
|
• DuckDuckGoSearchTool(query: str) |
|
• wikipedia_search(page: str) |
|
• summarize_query(query: str) |
|
|
|
Internally, for each question: |
|
1. Thought: decide which tool to call. |
|
2. Action: call the chosen tool. |
|
3. Observation: record the result. |
|
4. If empty/irrelevant: |
|
Thought: retry with summarize_query + DuckDuckGoSearchTool. |
|
Record new Observation. |
|
5. Thought: integrate observations. |
|
|
|
Finally, output your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. |
|
|
|
""" |
|
|
|
|
|
|
|
llm_wrapper = OpenAIModelWrapper(model_id=OPENAI_MODEL_ID, client=client) |
|
smart_agent = CodeAgent( |
|
tools=[search_tool, wiki_tool, summarize_tool], |
|
model=llm_wrapper |
|
) |
|
|
|
|
|
|
|
class BasicAgent: |
|
def __init__(self): |
|
logger.info("Initialized SmolAgent (GPT-4.1) with ReACT & tools.") |
|
|
|
def __call__(self, question: str) -> str: |
|
|
|
if not question or len(question.strip()) == 0: |
|
return "AGENT ERROR: Empty question." |
|
|
|
prompt = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip() |
|
if len(prompt) > MAX_PROMPT_LENGTH: |
|
prompt = prompt[:MAX_PROMPT_LENGTH] |
|
logger.warning("Prompt truncated to %d chars", MAX_PROMPT_LENGTH) |
|
try: |
|
return smart_agent.run(prompt) |
|
except Exception as e: |
|
logger.exception("Agent run error") |
|
return f"AGENT ERROR (run): {e}" |
|
|
|
|
|
|
|
def run_and_submit_all(profile: gr.OAuthProfile | None): |
|
if not profile: |
|
return "Please log in to Hugging Face.", None |
|
|
|
username = profile.username |
|
space_id = os.getenv("SPACE_ID", "") |
|
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" |
|
agent = BasicAgent() |
|
|
|
|
|
try: |
|
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15) |
|
resp.raise_for_status() |
|
questions = resp.json() |
|
if not isinstance(questions, list): |
|
raise ValueError("Invalid questions format") |
|
logger.info("Fetched %d questions", len(questions)) |
|
except Exception as e: |
|
logger.exception("Failed to fetch questions") |
|
return f"Error fetching questions: {e}", None |
|
|
|
|
|
logs, payload, skipped = [], [], 0 |
|
for item in questions: |
|
tid = item.get("task_id") |
|
q = item.get("question") |
|
if not tid or not q: |
|
skipped += 1 |
|
continue |
|
ans = agent(q) |
|
logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans}) |
|
payload.append({"task_id": tid, "submitted_answer": ans}) |
|
if skipped: |
|
logger.warning("Skipped %d malformed items", skipped) |
|
|
|
if not payload: |
|
return "Agent did not produce any answers.", pd.DataFrame(logs) |
|
|
|
|
|
submission = {"username": username, "agent_code": agent_code, "answers": payload} |
|
try: |
|
post = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60) |
|
post.raise_for_status() |
|
res = post.json() |
|
status = ( |
|
f"Submission Successful!\n" |
|
f"User: {res.get('username')}\n" |
|
f"Score: {res.get('score','N/A')}% " |
|
f"({res.get('correct_count','?')}/{res.get('total_attempted','?')})\n" |
|
f"Message: {res.get('message','')}" |
|
) |
|
return status, pd.DataFrame(logs) |
|
except Exception as e: |
|
logger.exception("Submission failed") |
|
return f"Submission Failed: {e}", pd.DataFrame(logs) |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# SmolAgent GAIA Runner 🚀") |
|
gr.Markdown(""" |
|
**Instructions:** |
|
1. Clone this space. |
|
2. Add `OPENAI_API_KEY` and optionally `OPENAI_MODEL_ID` in Settings → Secrets. |
|
3. Log in to Hugging Face. |
|
4. Click **Run Evaluation & Submit All Answers**. |
|
""") |
|
gr.LoginButton() |
|
run_btn = gr.Button("Run Evaluation & Submit All Answers") |
|
status_out = gr.Textbox(label="Status", lines=5, interactive=False) |
|
table_out = gr.DataFrame(label="Questions & Answers", wrap=True) |
|
run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(debug=True, share=False) |
|
|