File size: 5,045 Bytes
10e9b7d eccf8e4 3c4371f 7041928 10e9b7d e80aab9 3db6293 e80aab9 7041928 31243f4 7041928 31243f4 7041928 3c4371f 7e4a06b 7041928 7e4a06b 7041928 3c4371f 7041928 7e4a06b 31243f4 e80aab9 eccf8e4 31243f4 7d65c66 31243f4 7d65c66 7041928 e80aab9 7041928 7d65c66 7041928 31243f4 7041928 31243f4 7041928 31243f4 7041928 31243f4 7041928 e80aab9 7d65c66 e80aab9 7041928 e80aab9 7041928 e80aab9 7041928 7d65c66 7041928 e80aab9 7041928 e80aab9 7041928 0ee0419 e514fd7 7041928 e514fd7 e80aab9 7e4a06b 31243f4 7041928 e80aab9 7041928 e80aab9 7041928 3c4371f 7d65c66 3c4371f 7d65c66 3c4371f 7d65c66 3c4371f 31243f4 3c4371f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os
import gradio as gr
import requests
import pandas as pd
import openai
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Secure API Key ---
openai.api_key = os.getenv("OPENAI_API_KEY")
# --- Smart Agent Logic ---
class SmartAgent:
def __init__(self):
print("SmartAgent initialized using OpenAI.")
def __call__(self, question: str) -> str:
print(f"Question received: {question[:100]}")
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": question}],
temperature=0.2,
max_tokens=100
)
answer = response["choices"][0]["message"]["content"].strip()
print(f"Answer: {answer}")
return self.clean_answer(answer)
except Exception as e:
print(f"Error: {e}")
return "ERROR"
def clean_answer(self, answer: str) -> str:
return answer.strip().replace("FINAL ANSWER:", "").replace("Answer:", "").strip()
# --- Evaluation and Submission Logic ---
def run_and_submit_all(profile: gr.OAuthProfile | None):
space_id = os.getenv("SPACE_ID")
if profile:
username = profile.username
print(f"Logged in as: {username}")
else:
return "Please log in to Hugging Face using the button above.", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
print(f"Fetched {len(questions_data)} questions.")
except Exception as e:
return f"Failed to fetch questions: {e}", None
agent = SmartAgent()
results_log = []
answers_payload = []
for item in questions_data:
task_id = item.get("task_id")
question = item.get("question")
if not task_id or not question:
continue
try:
answer = agent(question)
answers_payload.append({"task_id": task_id, "submitted_answer": answer})
results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
except Exception as e:
results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"ERROR: {e}"})
if not answers_payload:
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
submission_data = {
"username": username,
"agent_code": agent_code,
"answers": answers_payload
}
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
summary = (
f"β
Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Score: {result_data.get('score')}%\n"
f"Correct: {result_data.get('correct_count')} / {result_data.get('total_attempted')}\n"
f"Message: {result_data.get('message', '')}"
)
return summary, pd.DataFrame(results_log)
except Exception as e:
return f"β Submission failed: {e}", pd.DataFrame(results_log)
# --- UI ---
with gr.Blocks() as demo:
gr.Markdown("# π€ GAIA Smart Agent Evaluation")
gr.Markdown(
"""
1. Login to Hugging Face.
2. Click "Run Evaluation" to evaluate your OpenAI-powered agent.
3. View your score on the leaderboard (requires public repo).
"""
)
gr.LoginButton()
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Status", lines=5, interactive=False)
results_table = gr.DataFrame(label="Agent Answers")
run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
if __name__ == "__main__":
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
if space_host_startup:
print(f"β
SPACE_HOST found: {space_host_startup}")
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
else:
print("βΉοΈ SPACE_HOST environment variable not found (running locally?).")
if space_id_startup: # Print repo URLs if SPACE_ID is found
print(f"β
SPACE_ID found: {space_id_startup}")
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
else:
print("βΉοΈ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
print("-"*(60 + len(" App Starting ")) + "\n")
print("Launching Gradio Interface for Basic Agent Evaluation...")
demo.launch(debug=True, share=False) |