File size: 4,987 Bytes
0412d41
aee5bda
 
f1dea33
0412d41
 
aee5bda
0412d41
aee5bda
 
0af627b
aee5bda
f1dea33
aee5bda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ce2d7e
aee5bda
2ce2d7e
aee5bda
2ce2d7e
 
 
 
aee5bda
 
 
 
0412d41
 
 
aee5bda
0412d41
 
aee5bda
 
 
 
63d6133
aee5bda
 
 
 
 
63d6133
 
0412d41
aee5bda
 
0412d41
2ae2c93
aee5bda
 
2ae2c93
63d6133
a058371
aee5bda
0412d41
2ae2c93
0412d41
aee5bda
a058371
aee5bda
0412d41
 
 
aee5bda
 
a058371
 
aee5bda
a058371
0412d41
63d6133
aee5bda
a058371
 
 
aee5bda
 
 
 
0412d41
aee5bda
2ae2c93
 
a058371
aee5bda
 
 
 
 
0412d41
a058371
0412d41
a058371
0412d41
63d6133
aee5bda
0412d41
aee5bda
63d6133
 
aee5bda
 
 
 
 
 
63d6133
 
0412d41
aee5bda
63d6133
aee5bda
63d6133
 
0412d41
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
import openai                              # ← official OpenAI client
from openai import OpenAI
import gradio as gr
import requests
import pandas as pd
from smolagents import DuckDuckGoSearchTool, tool

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Configure OpenAI SDK & Client ---

openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    raise RuntimeError("Set OPENAI_API_KEY in your Space secrets or env!")

openai.api_key = openai_api_key
client = OpenAI()  # new client object

# --- Tool Definitions ---

@tool
def summarize_query(query: str) -> str:
    """
    Provides a structured summary to reframe a query if search results are unclear or poor.

    Args:
        query (str): The search query that needs summarization.
    Returns:
        str: A concise summary of key facts about the given query.
    """
    return f"Summarize and reframe: {query}"

search_tool = DuckDuckGoSearchTool()

# --- ReACT + Scratchpad + Auto‐Retry Instruction Prompt ---

instruction_prompt = """

You are a high-precision AI agent. Internally, you may follow the ReACT pattern—thinking step-by-step, invoking tools, observing results, retrying if needed—but you must NOT show any of that. Instead, after you finish reasoning privately, output **exactly** one line:

FINAL ANSWER: [your concise answer]

Rules for the final answer:
- If it’s a number, output only the digits (no commas, units, or extra text).
- If it’s a list, output a comma-separated list with no extra punctuation or articles.
- If it’s a string, output only the words, no “um,” “the,” or other fillers.

"""

# --- BasicAgent using the new OpenAI client ---

class BasicAgent:
    def __init__(self):
        print("SmolAgent (GPT-4.1) with ReACT, Scratchpad & Retry initialized.")

    def __call__(self, question: str) -> str:
        # Build the full prompt
        prompt = instruction_prompt.strip() + "\n\nQUESTION: " + question.strip()
        print(f"Agent prompt (first 150 chars): {prompt[:150]}…")
        # Call GPT-4.1 via the new client.responses.create API
        try:
            response = client.responses.create(
                model="gpt-4.1",
                input=prompt
            )
            return response.output_text
        except Exception as e:
            return f"AGENT ERROR: {e}"

# --- Gradio / HF‐Spaces submission logic ---

def run_and_submit_all(profile: gr.OAuthProfile | None):
    if not profile:
        return "Please log in to Hugging Face using the login button above.", None

    username = profile.username
    space_id = os.getenv("SPACE_ID", "")
    agent = BasicAgent()
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # 1. Fetch questions
    try:
        resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
        resp.raise_for_status()
        questions = resp.json() or []
    except Exception as e:
        return f"Error fetching questions: {e}", None

    # 2. Run agent on each question
    logs, payload = [], []
    for item in questions:
        tid = item.get("task_id")
        q   = item.get("question")
        if not tid or q is None:
            continue
        ans = agent(q)
        logs.append({"Task ID": tid, "Question": q, "Submitted Answer": ans})
        payload.append({"task_id": tid, "submitted_answer": ans})

    if not payload:
        return "Agent did not produce any answers.", pd.DataFrame(logs)

    # 3. Submit answers
    submission = {"username": username, "agent_code": agent_code, "answers": payload}
    try:
        post = requests.post(f"{DEFAULT_API_URL}/submit", json=submission, timeout=60)
        post.raise_for_status()
        res = post.json()
        status = (
            f"Submission Successful!\n"
            f"User: {res.get('username')}\n"
            f"Overall Score: {res.get('score', 'N/A')}% "
            f"({res.get('correct_count', '?')}/{res.get('total_attempted', '?')})\n"
            f"Message: {res.get('message', '')}"
        )
        return status, pd.DataFrame(logs)
    except Exception as e:
        return f"Submission Failed: {e}", pd.DataFrame(logs)

# --- Gradio Interface ---

with gr.Blocks() as demo:
    gr.Markdown("# SmolAgent GAIA Runner (GPT-4.1) 🚀")
    gr.Markdown(
        """
        **Instructions:**
        1. Clone this space.
        2. In Settings → Secrets add `OPENAI_API_KEY`.
        3. Log in to Hugging Face.
        4. Click **Run Evaluation & Submit All Answers**.
        **Note:** Evaluation may take several minutes.
        """
    )
    gr.LoginButton()
    run_btn    = gr.Button("Run Evaluation & Submit All Answers")
    status_out = gr.Textbox(label="Status", lines=5, interactive=False)
    table_out  = gr.DataFrame(label="Questions & Answers", wrap=True)

    run_btn.click(fn=run_and_submit_all, outputs=[status_out, table_out])

if __name__ == "__main__":
    demo.launch(debug=True, share=False)