jesusgj commited on
Commit
423cd4c
·
1 Parent(s): a0b0035

Modified files

Browse files
Files changed (1) hide show
  1. app.py +102 -101
app.py CHANGED
@@ -1,75 +1,34 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
 
6
 
7
- # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
- """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
- and displays the results.
26
- """
27
- # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
-
30
- if profile:
31
- username= f"{profile.username}"
32
- print(f"User logged in: {username}")
33
- else:
34
- print("User not logged in.")
35
- return "Please Login to Hugging Face with the button.", None
36
-
37
- api_url = DEFAULT_API_URL
38
  questions_url = f"{api_url}/questions"
39
- submit_url = f"{api_url}/submit"
40
-
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
- try:
43
- agent = BasicAgent()
44
- except Exception as e:
45
- print(f"Error instantiating agent: {e}")
46
- return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
50
-
51
- # 2. Fetch Questions
52
  print(f"Fetching questions from: {questions_url}")
53
  try:
54
  response = requests.get(questions_url, timeout=15)
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
 
61
  except requests.exceptions.RequestException as e:
62
- print(f"Error fetching questions: {e}")
63
- return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
- print(f"An unexpected error occurred fetching questions: {e}")
70
- return f"An unexpected error occurred fetching questions: {e}", None
71
 
72
- # 3. Run your Agent
 
73
  results_log = []
74
  answers_payload = []
75
  print(f"Running agent on {len(questions_data)} questions...")
@@ -84,34 +43,19 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
-
90
- if not answers_payload:
91
- print("Agent did not produce any answers to submit.")
92
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
-
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
- print(status_update)
98
 
99
- # 5. Submit
100
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
 
 
 
101
  try:
102
  response = requests.post(submit_url, json=submission_data, timeout=60)
103
  response.raise_for_status()
104
- result_data = response.json()
105
- final_status = (
106
- f"Submission Successful!\n"
107
- f"User: {result_data.get('username')}\n"
108
- f"Overall Score: {result_data.get('score', 'N/A')}% "
109
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
- f"Message: {result_data.get('message', 'No message received.')}"
111
- )
112
- print("Submission successful.")
113
- results_df = pd.DataFrame(results_log)
114
- return final_status, results_df
115
  except requests.exceptions.HTTPError as e:
116
  error_detail = f"Server responded with status {e.response.status_code}."
117
  try:
@@ -119,40 +63,98 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
119
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
  except requests.exceptions.JSONDecodeError:
121
  error_detail += f" Response: {e.response.text[:500]}"
122
- status_message = f"Submission Failed: {error_detail}"
123
- print(status_message)
124
- results_df = pd.DataFrame(results_log)
125
- return status_message, results_df
126
  except requests.exceptions.Timeout:
127
- status_message = "Submission Failed: The request timed out."
128
- print(status_message)
129
- results_df = pd.DataFrame(results_log)
130
- return status_message, results_df
131
  except requests.exceptions.RequestException as e:
132
- status_message = f"Submission Failed: Network error - {e}"
133
- print(status_message)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  results_df = pd.DataFrame(results_log)
135
- return status_message, results_df
 
 
 
 
 
 
 
 
 
136
  except Exception as e:
137
- status_message = f"An unexpected error occurred during submission: {e}"
138
  print(status_message)
139
- results_df = pd.DataFrame(results_log)
140
- return status_message, results_df
141
 
 
142
 
143
- # --- Build Gradio Interface using Blocks ---
144
  with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
147
  """
148
  **Instructions:**
149
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
150
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
151
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
152
  ---
153
- **Disclaimers:**
154
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
155
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
156
  """
157
  )
158
 
@@ -161,7 +163,6 @@ with gr.Blocks() as demo:
161
  run_button = gr.Button("Run Evaluation & Submit All Answers")
162
 
163
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
164
- # Removed max_rows=10 from DataFrame constructor
165
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
166
 
167
  run_button.click(
@@ -173,7 +174,7 @@ if __name__ == "__main__":
173
  print("\n" + "-"*30 + " App Starting " + "-"*30)
174
  # Check for SPACE_HOST and SPACE_ID at startup for information
175
  space_host_startup = os.getenv("SPACE_HOST")
176
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
177
 
178
  if space_host_startup:
179
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -181,7 +182,7 @@ if __name__ == "__main__":
181
  else:
182
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
183
 
184
- if space_id_startup: # Print repo URLs if SPACE_ID is found
185
  print(f"✅ SPACE_ID found: {space_id_startup}")
186
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
187
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
5
+ from agent import initialize_agent # Import the agent initialization function
6
 
 
7
  # --- Constants ---
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
10
+ # --- Helper Functions ---
11
+ def _fetch_questions(api_url: str) -> list:
12
+ """Fetches evaluation questions from the API."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  questions_url = f"{api_url}/questions"
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  print(f"Fetching questions from: {questions_url}")
15
  try:
16
  response = requests.get(questions_url, timeout=15)
17
  response.raise_for_status()
18
  questions_data = response.json()
19
  if not questions_data:
20
+ raise ValueError("Fetched questions list is empty or invalid format.")
 
21
  print(f"Fetched {len(questions_data)} questions.")
22
+ return questions_data
23
  except requests.exceptions.RequestException as e:
24
+ raise RuntimeError(f"Error fetching questions: {e}") from e
 
25
  except requests.exceptions.JSONDecodeError as e:
26
+ raise RuntimeError(f"Error decoding JSON response from questions endpoint: {e}. Response: {response.text[:500]}") from e
 
 
27
  except Exception as e:
28
+ raise RuntimeError(f"An unexpected error occurred fetching questions: {e}") from e
 
29
 
30
+ def _run_agent_on_questions(agent, questions_data: list) -> tuple[list, list]:
31
+ """Runs the agent on each question and collects answers and logs."""
32
  results_log = []
33
  answers_payload = []
34
  print(f"Running agent on {len(questions_data)} questions...")
 
43
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
44
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
45
  except Exception as e:
46
+ print(f"Error running agent on task {task_id}: {e}")
47
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
48
+ return answers_payload, results_log
 
 
 
 
 
 
 
 
49
 
50
+ def _submit_answers(api_url: str, username: str, agent_code_url: str, answers_payload: list) -> dict:
51
+ """Submits the agent's answers to the evaluation API."""
52
+ submit_url = f"{api_url}/submit"
53
+ submission_data = {"username": username.strip(), "agent_code": agent_code_url, "answers": answers_payload}
54
+ print(f"Submitting {len(answers_payload)} answers for user '{username}' to: {submit_url}")
55
  try:
56
  response = requests.post(submit_url, json=submission_data, timeout=60)
57
  response.raise_for_status()
58
+ return response.json()
 
 
 
 
 
 
 
 
 
 
59
  except requests.exceptions.HTTPError as e:
60
  error_detail = f"Server responded with status {e.response.status_code}."
61
  try:
 
63
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
64
  except requests.exceptions.JSONDecodeError:
65
  error_detail += f" Response: {e.response.text[:500]}"
66
+ raise RuntimeError(f"Submission Failed: {error_detail}") from e
 
 
 
67
  except requests.exceptions.Timeout:
68
+ raise RuntimeError("Submission Failed: The request timed out.") from e
 
 
 
69
  except requests.exceptions.RequestException as e:
70
+ raise RuntimeError(f"Submission Failed: Network error - {e}") from e
71
+ except Exception as e:
72
+ raise RuntimeError(f"An unexpected error occurred during submission: {e}") from e
73
+
74
+ # --- Main Gradio Function ---
75
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
76
+ """
77
+ Orchestrates the fetching of questions, running the agent, and submitting answers.
78
+ """
79
+ username = None
80
+ if profile:
81
+ username = profile.username
82
+ print(f"User logged in: {username}")
83
+ else:
84
+ print("User not logged in.")
85
+ return "Please Login to Hugging Face with the button.", None
86
+
87
+ if not username:
88
+ return "Hugging Face username not found. Please ensure you are logged in.", None
89
+
90
+ space_id = os.getenv("SPACE_ID")
91
+ if not space_id:
92
+ print("SPACE_ID environment variable not found. Cannot determine agent_code URL.")
93
+ return "Error: SPACE_ID not set. Cannot determine agent_code URL.", None
94
+ agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main"
95
+
96
+ status_message = ""
97
+ results_df = pd.DataFrame()
98
+
99
+ try:
100
+ # 1. Instantiate Agent
101
+ print("Initializing agent...")
102
+ agent = initialize_agent()
103
+ if agent is None:
104
+ raise RuntimeError("Agent initialization failed. Check agent.py for details.")
105
+ print("Agent initialized successfully.")
106
+
107
+ # 2. Fetch Questions
108
+ questions_data = _fetch_questions(DEFAULT_API_URL)
109
+
110
+ # 3. Run Agent on Questions
111
+ answers_payload, results_log = _run_agent_on_questions(agent, questions_data)
112
+ if not answers_payload:
113
+ status_message = "Agent did not produce any answers to submit."
114
+ return status_message, pd.DataFrame(results_log)
115
+
116
+ # 4. Submit Answers
117
+ submission_result = _submit_answers(DEFAULT_API_URL, username, agent_code_url, answers_payload)
118
+
119
+ final_status = (
120
+ f"Submission Successful!\n"
121
+ f"User: {submission_result.get('username')}\n"
122
+ f"Overall Score: {submission_result.get('score', 'N/A')}% "
123
+ f"({submission_result.get('correct_count', '?')}/{submission_result.get('total_attempted', '?')} correct)\n"
124
+ f"Message: {submission_result.get('message', 'No message received.')}"
125
+ )
126
+ status_message = final_status
127
  results_df = pd.DataFrame(results_log)
128
+
129
+ except RuntimeError as e:
130
+ status_message = f"Operation Failed: {e}"
131
+ print(status_message)
132
+ # If an error occurs during agent run, results_log might be partially filled
133
+ # Ensure results_df is created even if answers_payload is empty due to early error
134
+ if 'results_log' in locals():
135
+ results_df = pd.DataFrame(results_log)
136
+ else:
137
+ results_df = pd.DataFrame([{"Status": "Error", "Details": str(e)}])
138
  except Exception as e:
139
+ status_message = f"An unexpected critical error occurred: {e}"
140
  print(status_message)
141
+ results_df = pd.DataFrame([{"Status": "Critical Error", "Details": str(e)}])
 
142
 
143
+ return status_message, results_df
144
 
145
+ # --- Gradio Interface Definition ---
146
  with gr.Blocks() as demo:
147
+ gr.Markdown("# GAIA Benchmark Evaluation with smolagent")
148
  gr.Markdown(
149
  """
150
  **Instructions:**
151
+ 1. Clone this Space and modify `agent.py` to define your agent's logic, tools, and necessary packages.
152
+ 2. Log in to your Hugging Face account using the button below. Your HF username will be used for submission.
153
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see your score.
154
  ---
155
+ **Important Notes:**
156
+ * The evaluation process can take some time as the agent processes all questions.
157
+ * This Space provides a basic setup. You are encouraged to develop a more robust solution (e.g., caching answers, asynchronous processing) for production use.
158
  """
159
  )
160
 
 
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
166
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
167
 
168
  run_button.click(
 
174
  print("\n" + "-"*30 + " App Starting " + "-"*30)
175
  # Check for SPACE_HOST and SPACE_ID at startup for information
176
  space_host_startup = os.getenv("SPACE_HOST")
177
+ space_id_startup = os.getenv("SPACE_ID")
178
 
179
  if space_host_startup:
180
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
182
  else:
183
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
184
 
185
+ if space_id_startup:
186
  print(f"✅ SPACE_ID found: {space_id_startup}")
187
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
188
  print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")