File size: 5,045 Bytes
10e9b7d
 
eccf8e4
3c4371f
7041928
10e9b7d
e80aab9
3db6293
e80aab9
7041928
 
 
 
 
31243f4
7041928
 
31243f4
7041928
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c4371f
7e4a06b
7041928
 
7e4a06b
7041928
3c4371f
7041928
7e4a06b
31243f4
 
e80aab9
eccf8e4
31243f4
7d65c66
31243f4
 
7d65c66
7041928
e80aab9
7041928
7d65c66
 
7041928
31243f4
 
7041928
 
31243f4
 
7041928
 
 
31243f4
7041928
31243f4
 
 
 
7041928
 
 
 
 
e80aab9
 
7d65c66
e80aab9
 
7041928
 
e80aab9
7041928
 
 
e80aab9
7041928
7d65c66
7041928
e80aab9
7041928
e80aab9
7041928
0ee0419
e514fd7
7041928
 
 
e514fd7
e80aab9
7e4a06b
31243f4
7041928
 
e80aab9
7041928
e80aab9
 
7041928
 
3c4371f
7d65c66
 
3c4371f
 
7d65c66
3c4371f
7d65c66
 
 
 
 
 
 
 
 
3c4371f
 
31243f4
3c4371f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import gradio as gr
import requests
import pandas as pd
import openai

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Secure API Key ---
openai.api_key = os.getenv("OPENAI_API_KEY")

# --- Smart Agent Logic ---
class SmartAgent:
    def __init__(self):
        print("SmartAgent initialized using OpenAI.")
        
    def __call__(self, question: str) -> str:
        print(f"Question received: {question[:100]}")
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": question}],
                temperature=0.2,
                max_tokens=100
            )
            answer = response["choices"][0]["message"]["content"].strip()
            print(f"Answer: {answer}")
            return self.clean_answer(answer)
        except Exception as e:
            print(f"Error: {e}")
            return "ERROR"

    def clean_answer(self, answer: str) -> str:
        return answer.strip().replace("FINAL ANSWER:", "").replace("Answer:", "").strip()

# --- Evaluation and Submission Logic ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
    space_id = os.getenv("SPACE_ID")

    if profile:
        username = profile.username
        print(f"Logged in as: {username}")
    else:
        return "Please log in to Hugging Face using the button above.", None

    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
        print(f"Fetched {len(questions_data)} questions.")
    except Exception as e:
        return f"Failed to fetch questions: {e}", None

    agent = SmartAgent()
    results_log = []
    answers_payload = []

    for item in questions_data:
        task_id = item.get("task_id")
        question = item.get("question")
        if not task_id or not question:
            continue
        try:
            answer = agent(question)
            answers_payload.append({"task_id": task_id, "submitted_answer": answer})
            results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
        except Exception as e:
            results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"ERROR: {e}"})

    if not answers_payload:
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    submission_data = {
        "username": username,
        "agent_code": agent_code,
        "answers": answers_payload
    }

    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        summary = (
            f"βœ… Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Score: {result_data.get('score')}%\n"
            f"Correct: {result_data.get('correct_count')} / {result_data.get('total_attempted')}\n"
            f"Message: {result_data.get('message', '')}"
        )
        return summary, pd.DataFrame(results_log)
    except Exception as e:
        return f"❌ Submission failed: {e}", pd.DataFrame(results_log)

# --- UI ---
with gr.Blocks() as demo:
    gr.Markdown("# πŸ€– GAIA Smart Agent Evaluation")
    gr.Markdown(
        """
        1. Login to Hugging Face.
        2. Click "Run Evaluation" to evaluate your OpenAI-powered agent.
        3. View your score on the leaderboard (requires public repo).
        """
    )
    gr.LoginButton()
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Status", lines=5, interactive=False)
    results_table = gr.DataFrame(label="Agent Answers")

    run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])

if __name__ == "__main__":
    

    space_host_startup = os.getenv("SPACE_HOST")
    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup

    if space_host_startup:
        print(f"βœ… SPACE_HOST found: {space_host_startup}")
        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
    else:
        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")

    if space_id_startup: # Print repo URLs if SPACE_ID is found
        print(f"βœ… SPACE_ID found: {space_id_startup}")
        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
    else:
        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")

    print("-"*(60 + len(" App Starting ")) + "\n")

    print("Launching Gradio Interface for Basic Agent Evaluation...")
    demo.launch(debug=True, share=False)