darrenphodgson76's picture
Update app.py
42f801e verified
raw
history blame
7.55 kB
import os
import logging
import openai
from openai import OpenAI
import gradio as gr
import requests
import pandas as pd
from smolagents import CodeAgent, DuckDuckGoSearchTool, tool
# --- Setup logging ---
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
MAX_PROMPT_LENGTH = 15000 # characters, naive cap to avoid token overflow
# --- Configure OpenAI SDK & Client ---
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
raise RuntimeError("Environment variable OPENAI_API_KEY is required.")
openai.api_key = openai_api_key
client = OpenAI()
OPENAI_MODEL_ID = os.getenv("OPENAI_MODEL_ID", "gpt-4.1")
# --- Adapter so CodeAgent can call the OpenAI client correctly ---
class OpenAIModelWrapper:
def __init__(self, model_id: str, client: OpenAI):
self.model_id = model_id
self.client = client
def __call__(self, prompt: str, **kwargs) -> str:
try:
resp = self.client.responses.create(
model=self.model_id,
input=prompt
)
return getattr(resp, "output_text", str(resp))
except Exception as e:
logger.exception("OpenAI inference error")
return f"AGENT ERROR (inference): {e}"
# --- Tool Definitions ---
@tool
def summarize_query(query: str) -> str:
"""
Reframes an unclear search query to improve relevance.
Args:
query (str): The original search query needing refinement.
Returns:
str: A concise, improved version of the query.
"""
return f"Summarize and reframe: {query}"
@tool
def wikipedia_search(page: str) -> str:
"""
Fetches the summary extract of an English Wikipedia page via the REST API.
Args:
page (str): Title of the Wikipedia page (e.g. 'Mercedes_Sosa_discography').
Returns:
str: The page's summary (or an error message).
"""
try:
url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page}"
resp = requests.get(url, timeout=10)
resp.raise_for_status()
extract = resp.json().get("extract", "")
if not extract:
logger.warning("Wikipedia returned empty extract for %s", page)
return extract
except Exception as e:
logger.exception("Wikipedia lookup failed")
return f"Wikipedia error: {e}"
# Instantiate tools
search_tool = DuckDuckGoSearchTool()
wiki_tool = wikipedia_search
summarize_tool = summarize_query
# --- ReACT Prompt ---
instruction_prompt = """
You are a ReACT agent with three tools:
• DuckDuckGoSearchTool(query: str)
• wikipedia_search(page: str)
• summarize_query(query: str)
Internally, for each question:
1. Thought: decide which tool to call.
2. Action: call the chosen tool.
3. Observation: record the result.
4. If empty/irrelevant:
Thought: retry with summarize_query + DuckDuckGoSearchTool.
Record new Observation.
5. Thought: integrate observations.
Finally, output your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
"""
# --- Build CodeAgent ---
llm_wrapper = OpenAIModelWrapper(model_id=OPENAI_MODEL_ID, client=client)
smart_agent = CodeAgent(
tools=[search_tool, wiki_tool, summarize_tool],
model=llm_wrapper
)
# --- BasicAgent for Gradio ---
class BasicAgent:
def __init__(self):
logger.info("Initialized SmolAgent (GPT-4.1) with ReACT & tools.")
def __call__(self, question: str) -> str:
# Validate question
if not question or len(question.strip()) == 0:
return "AGENT ERROR: Empty question."
# Build and truncate prompt
prompt = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip()
if len(prompt) > MAX_PROMPT_LENGTH:
prompt = prompt[:MAX_PROMPT_LENGTH] # naive trim
logger.warning("Prompt truncated to %d chars", MAX_PROMPT_LENGTH)
try:
return smart_agent.run(prompt)
except Exception as e:
logger.exception("Agent run error")
return f"AGENT ERROR (run): {e}"
# --- Submission logic ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
if not profile:
return "Please log in to Hugging Face.", None
username = profile.username
space_id = os.getenv("SPACE_ID", "")
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
agent = BasicAgent()
# Fetch questions
try:
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
resp.raise_for_status()
questions = resp.json()
if not isinstance(questions, list):
raise ValueError("Invalid questions format")
logger.info("Fetched %d questions", len(questions))
except Exception as e:
logger.exception("Failed to fetch questions")
return f"Error fetching questions: {e}", None
# Run agent
logs, payload, skipped = [], [], 0
for item in questions:
tid = item.get("task_id")
q = item.get("question")
if not tid or not q:
skipped += 1
continue
ans = agent(q)
logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
payload.append({"task_id": tid, "submitted_answer": ans})
if skipped:
logger.warning("Skipped %d malformed items", skipped)
if not payload:
return "Agent did not produce any answers.", pd.DataFrame(logs)
# Submit answers
submission = {"username": username, "agent_code": agent_code, "answers": payload}
try:
post = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
post.raise_for_status()
res = post.json()
status = (
f"Submission Successful!\n"
f"User: {res.get('username')}\n"
f"Score: {res.get('score','N/A')}% "
f"({res.get('correct_count','?')}/{res.get('total_attempted','?')})\n"
f"Message: {res.get('message','')}"
)
return status, pd.DataFrame(logs)
except Exception as e:
logger.exception("Submission failed")
return f"Submission Failed: {e}", pd.DataFrame(logs)
# --- Gradio UI ---
with gr.Blocks() as demo:
gr.Markdown("# SmolAgent GAIA Runner 🚀")
gr.Markdown("""
**Instructions:**
1. Clone this space.
2. Add `OPENAI_API_KEY` and optionally `OPENAI_MODEL_ID` in Settings → Secrets.
3. Log in to Hugging Face.
4. Click **Run Evaluation & Submit All Answers**.
""")
gr.LoginButton()
run_btn = gr.Button("Run Evaluation & Submit All Answers")
status_out = gr.Textbox(label="Status", lines=5, interactive=False)
table_out = gr.DataFrame(label="Questions & Answers", wrap=True)
run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])
if __name__ == "__main__":
demo.launch(debug=True, share=False)