schoolkithub commited on
Commit
779b7ec
·
verified ·
1 Parent(s): 2531dc8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -125
app.py CHANGED
@@ -1,138 +1,182 @@
1
- import gradio as gr
2
- import json
3
  import os
4
- from datetime import datetime
 
 
 
5
  from agent import GAIAAgent
6
- from evaluate import evaluate_agent, create_sample_dataset
7
- import traceback
 
 
 
 
 
 
 
 
 
8
 
9
- def run_evaluation():
10
- """Run the GAIA evaluation and return results."""
 
 
 
 
 
 
 
 
 
 
11
  try:
12
- print("Starting GAIA Agent Evaluation...")
13
- print("=" * 50)
14
-
15
- # Initialize agent
16
  agent = GAIAAgent()
17
-
18
- # Test API connection first
19
- print("Testing xAI API connection...")
20
- test_response = agent.test_grok()
21
- print(f"API Test Response: {test_response}")
22
-
23
- # Run evaluation on sample dataset (since we don't have the full GAIA dataset)
24
- print("\nRunning evaluation on sample tasks...")
25
- score = evaluate_agent(dataset_path=None, max_tasks=10)
26
-
27
- # Read submission file if it exists
28
- submission_content = ""
29
- if os.path.exists("submission.jsonl"):
30
- with open("submission.jsonl", "r") as f:
31
- submission_content = f.read()
32
-
33
- # Format results
34
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
35
-
36
- results = f"""
37
- # GAIA Agent Evaluation Results
38
-
39
- **Timestamp:** {timestamp}
40
- **Final Score:** {score:.2f}%
41
- **Certificate Status:** {'✅ ACHIEVED (≥30%)' if score >= 30 else '❌ NOT ACHIEVED (<30%)'}
42
-
43
- ## API Connection Status
44
- {test_response}
45
-
46
- ## Submission File Preview
47
- ```json
48
- {submission_content[:500]}{'...' if len(submission_content) > 500 else ''}
49
- ```
50
-
51
- ## Next Steps
52
- {'🎉 Congratulations! You can now claim your Certificate of Excellence!' if score >= 30 else '💪 Keep improving your agent to reach the 30% threshold.'}
53
- """
54
-
55
- return results, score
56
-
57
  except Exception as e:
58
- error_msg = f"""
59
- # Evaluation Error
60
 
61
- **Error:** {str(e)}
 
62
 
63
- **Traceback:**
64
- ```
65
- {traceback.format_exc()}
66
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- Please check the logs and fix any issues before retrying.
69
- """
70
- return error_msg, 0.0
71
-
72
- def create_interface():
73
- """Create the Gradio interface."""
74
-
75
- with gr.Blocks(title="GAIA Agent Evaluation", theme=gr.themes.Soft()) as demo:
76
- gr.Markdown("""
77
- # 🤖 GAIA Agent Evaluation
78
-
79
- This is your GAIA benchmark agent for the Hugging Face Agents Course Certificate of Excellence.
80
-
81
- **Goal:** Achieve ≥30% score on GAIA benchmark tasks
82
-
83
- Click the button below to run the evaluation and submit your answers.
84
-
85
- ⚠️ **Note:** This may take several minutes to complete. Please be patient.
86
- """)
87
-
88
- with gr.Row():
89
- run_btn = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary", size="lg")
90
-
91
- with gr.Row():
92
- with gr.Column():
93
- gr.Markdown("## Run Status / Submission Result")
94
- results_output = gr.Markdown("Click the button above to start evaluation...")
95
-
96
- with gr.Column():
97
- gr.Markdown("## Score")
98
- score_output = gr.Number(label="Final Score (%)", value=0.0, interactive=False)
99
-
100
- # Event handler
101
- run_btn.click(
102
- fn=run_evaluation,
103
- inputs=[],
104
- outputs=[results_output, score_output],
105
- show_progress=True
 
 
 
106
  )
107
-
108
- gr.Markdown("""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  ---
110
-
111
- ## About This Agent
112
-
113
- - **API:** xAI Grok for reasoning
114
- - **Tools:** Web search, file handling, math calculations
115
- - **Fallbacks:** Local knowledge for common questions
116
- - **Target:** 30% accuracy for certificate eligibility
117
-
118
- ## Troubleshooting
119
-
120
- If you encounter issues:
121
- 1. Check the container logs in the "Logs" tab
122
- 2. Verify API credentials and internet connectivity
123
- 3. Ensure all dependencies are installed
124
-
125
- **Good luck! 🍀**
126
- """)
127
-
128
- return demo
129
 
130
  if __name__ == "__main__":
131
- # Create and launch the interface
132
- demo = create_interface()
133
- demo.launch(
134
- server_name="0.0.0.0",
135
- server_port=7860,
136
- show_error=True,
137
- show_api=False
138
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import gradio as gr
3
+ import requests
4
+ import inspect
5
+ import pandas as pd
6
  from agent import GAIAAgent
7
+ from evaluate import extract_final_answer
8
+
9
+ # --- Constants ---
10
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
+
12
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
13
+ """
14
+ Fetches all questions, runs the GAIAAgent on them, submits all answers,
15
+ and displays the results.
16
+ """
17
+ space_id = os.getenv("SPACE_ID")
18
 
19
+ if profile:
20
+ username = profile.username
21
+ print(f"User logged in: {username}")
22
+ else:
23
+ print("User not logged in.")
24
+ return "Please Login to Hugging Face with the button.", None
25
+
26
+ api_url = DEFAULT_API_URL
27
+ questions_url = f"{api_url}/questions"
28
+ submit_url = f"{api_url}/submit"
29
+
30
+ # 1. Instantiate Agent
31
  try:
 
 
 
 
32
  agent = GAIAAgent()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  except Exception as e:
34
+ print(f"Error instantiating agent: {e}")
35
+ return f"Error initializing agent: {e}", None
36
 
37
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
38
+ print(agent_code)
39
 
40
+ # 2. Fetch Questions
41
+ print(f"Fetching questions from: {questions_url}")
42
+ try:
43
+ response = requests.get(questions_url, timeout=15)
44
+ response.raise_for_status()
45
+ questions_data = response.json()
46
+ if not questions_data:
47
+ print("Fetched questions list is empty.")
48
+ return "Fetched questions list is empty or invalid format.", None
49
+ print(f"Fetched {len(questions_data)} questions.")
50
+ except requests.exceptions.RequestException as e:
51
+ print(f"Error fetching questions: {e}")
52
+ return f"Error fetching questions: {e}", None
53
+ except requests.exceptions.JSONDecodeError as e:
54
+ print(f"Error decoding JSON response from questions endpoint: {e}")
55
+ print(f"Response text: {response.text[:500]}")
56
+ return f"Error decoding server response for questions: {e}", None
57
+ except Exception as e:
58
+ print(f"An unexpected error occurred fetching questions: {e}")
59
+ return f"An unexpected error occurred fetching questions: {e}", None
60
 
61
+ # 3. Run Your Agent
62
+ results_log = []
63
+ answers_payload = []
64
+ print(f"Running agent on {len(questions_data)} questions...")
65
+ for item in questions_data:
66
+ task_id = item.get("task_id")
67
+ question_text = item.get("question")
68
+ if not task_id or question_text is None:
69
+ print(f"Skipping item with missing task_id or question: {item}")
70
+ continue
71
+ try:
72
+ # Process task with GAIAAgent
73
+ response = agent.process_task({"task_id": task_id, "question": question_text, "file_name": None})
74
+ submitted_answer = extract_final_answer(response)
75
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
76
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
77
+ except Exception as e:
78
+ print(f"Error running agent on task {task_id}: {e}")
79
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
80
+
81
+ if not answers_payload:
82
+ print("Agent did not produce any answers to submit.")
83
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
84
+
85
+ # 4. Prepare Submission
86
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
87
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
88
+ print(status_update)
89
+
90
+ # 5. Submit
91
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
92
+ try:
93
+ response = requests.post(submit_url, json=submission_data, timeout=60)
94
+ response.raise_for_status()
95
+ result_data = response.json()
96
+ final_status = (
97
+ f"Submission Successful!\n"
98
+ f"User: {result_data.get('username')}\n"
99
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
100
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
101
+ f"Message: {result_data.get('message', 'No message received.')}"
102
  )
103
+ print("Submission successful.")
104
+ results_df = pd.DataFrame(results_log)
105
+ return final_status, results_df
106
+ except requests.exceptions.HTTPError as e:
107
+ error_detail = f"Server responded with status {e.response.status_code}."
108
+ try:
109
+ error_json = e.response.json()
110
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
111
+ except requests.exceptions.JSONDecodeError:
112
+ error_detail += f" Response: {e.response.text[:500]}"
113
+ status_message = f"Submission Failed: {error_detail}"
114
+ print(status_message)
115
+ results_df = pd.DataFrame(results_log)
116
+ return status_message, results_df
117
+ except requests.exceptions.Timeout:
118
+ status_message = "Submission Failed: The request timed out."
119
+ print(status_message)
120
+ results_df = pd.DataFrame(results_log)
121
+ return status_message, results_df
122
+ except requests.exceptions.RequestException as e:
123
+ status_message = f"Submission Failed: Network error - {e}"
124
+ print(status_message)
125
+ results_df = pd.DataFrame(results_log)
126
+ return status_message, results_df
127
+ except Exception as e:
128
+ status_message = f"An unexpected error occurred during submission: {e}"
129
+ print(status_message)
130
+ results_df = pd.DataFrame(results_log)
131
+ return status_message, results_df
132
+
133
+ # --- Build Gradio Interface using Blocks ---
134
+ with gr.Blocks() as demo:
135
+ gr.Markdown("# 🤖 GAIA Agent Evaluation")
136
+ gr.Markdown(
137
+ """
138
+ **Instructions:**
139
+ 1. Please clone this space, then modify the code to define your agent's logic, tools, and packages.
140
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
141
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
142
+
143
  ---
144
+ **Disclaimers:**
145
+ Once clicking on the 'submit' button, it can take quite some time (this is the time for the agent to go through all the questions).
146
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution.
147
+ """
148
+ )
149
+
150
+ gr.LoginButton()
151
+
152
+ run_button = gr.Button("🚀 Run Evaluation & Submit All Answers")
153
+
154
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
155
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
156
+
157
+ run_button.click(
158
+ fn=run_and_submit_all,
159
+ outputs=[status_output, results_table]
160
+ )
 
 
161
 
162
  if __name__ == "__main__":
163
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
164
+ space_host_startup = os.getenv("SPACE_HOST")
165
+ space_id_startup = os.getenv("SPACE_ID")
166
+
167
+ if space_host_startup:
168
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
169
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
170
+ else:
171
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
172
+
173
+ if space_id_startup:
174
+ print(f"✅ SPACE_ID found: {space_id_startup}")
175
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
176
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
177
+ else:
178
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
179
+
180
+ print("-"*(60 + len(" App Starting ")) + "\n")
181
+ print("Launching Gradio Interface for GAIA Agent Evaluation...")
182
+ demo.launch(debug=True, share=False)