Artem Zhirkevich commited on
Commit
f590bb2
·
1 Parent(s): 81917a3

new version

Browse files
Files changed (7) hide show
  1. .gitignore +6 -0
  2. agent.py +334 -0
  3. app.py +52 -47
  4. dry_run.py +60 -0
  5. evaluation_api.py +30 -0
  6. requirements.txt +29 -1
  7. system_prompt.txt +8 -0
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ venv/
2
+ __pycache__/
3
+ .env
4
+
5
+ # https://huggingface.co/datasets/gaia-benchmark/GAIA/tree/main/2023/validation
6
+ gaia_2023_set/
agent.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import tempfile
4
+ import requests
5
+ import pytesseract
6
+ import wikipedia
7
+ import mwclient
8
+ import pandas as pd
9
+ import easyocr
10
+ from typing import List, Optional, Dict, Any
11
+ from urllib.parse import urlparse
12
+ from dotenv import load_dotenv
13
+ from PIL import Image
14
+ from tavily import TavilyClient
15
+ from arxiv import Search, Client, SortCriterion, SortOrder
16
+
17
+ from langgraph.graph.state import CompiledStateGraph
18
+ from langgraph.graph import START, StateGraph, MessagesState
19
+ from langgraph.prebuilt import tools_condition
20
+ from langgraph.prebuilt import ToolNode
21
+
22
+ from langchain_groq import ChatGroq
23
+ from langchain_core.messages import HumanMessage, SystemMessage
24
+ from langchain_google_genai import ChatGoogleGenerativeAI
25
+ from langchain.memory import ConversationBufferMemory
26
+ from langchain.tools import Tool, tool
27
+ from langchain.callbacks.tracers import ConsoleCallbackHandler
28
+ from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
29
+ from langchain_community.utilities import WikipediaAPIWrapper
30
+ from langchain_experimental.utilities import PythonREPL
31
+ from langchain_community.document_loaders import WebBaseLoader
32
+
33
+
34
+ load_dotenv()
35
+
36
+ vision_llm = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct", groq_api_key=os.getenv('GROQ_API_KEY'))
37
+
38
+
39
+ @tool
40
+ def web_search(query: str, domain: Optional[str] = None) -> str:
41
+ """
42
+ Perform a web search and return the raw results as a string.
43
+
44
+ Args:
45
+ query (str): The search query.
46
+ domain (Optional[str]): If provided, restricts the search to this domain.
47
+
48
+ Returns:
49
+ str: Raw search results concatenated into a string.
50
+ """
51
+ try:
52
+ time.sleep(2)
53
+ search = DuckDuckGoSearchAPIWrapper()
54
+ if domain:
55
+ query = f"{query} site:{domain}"
56
+ results = search.results(query, max_results=3)
57
+
58
+ if not results:
59
+ return "No results found."
60
+
61
+ # Format into simple title + snippet
62
+ formatted = ""
63
+ for r in results:
64
+ formatted += f"Title: {r['title']}\nURL: {r['link']}\nSnippet: {r['snippet']}\n\n"
65
+ return formatted.strip()
66
+
67
+ except Exception as e:
68
+ return f"Search error: {e}"
69
+
70
+
71
+ @tool
72
+ def visit_webpage(url: str):
73
+ """
74
+ Fetches and loads the content of a webpage given its URL.
75
+
76
+ Parameters:
77
+ url (str): The URL of the webpage to be visited.
78
+
79
+ Returns:
80
+ str: A string containing the loaded content of the webpage.
81
+ """
82
+
83
+ # Initialize a WebBaseLoader with the provided URL
84
+ loader = WebBaseLoader(url)
85
+
86
+ # Set requests_kwargs to disable SSL certificate verification
87
+ # This can help bypass SSL certificate errors but should be used cautiously
88
+ loader.requests_kwargs = {'verify': False}
89
+
90
+ # Load the webpage content using the loader
91
+ docs = loader.load()
92
+
93
+ # Return the loaded content formatted as a string
94
+ return f"Page content: {docs}"
95
+
96
+
97
+ @tool
98
+ def wikipedia_search(query: str, max_docs: int = 1) -> str:
99
+ """
100
+ Search Wikipedia using mwclient and return exactly `max_docs` results.
101
+
102
+ Args:
103
+ query (str): The search query.
104
+ max_docs (int): Number of results to return. Default is 1.
105
+ """
106
+ try:
107
+ time.sleep(2)
108
+ site = mwclient.Site("en.wikipedia.org")
109
+ results = site.search(query, limit=max_docs)
110
+
111
+ output = ""
112
+ count = 0
113
+
114
+ for page_info in results:
115
+ title = page_info["title"]
116
+ try:
117
+ page = site.pages[title]
118
+ content = page.text()
119
+ first_paragraph = content.split('\n\n')[0]
120
+
121
+ url = f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
122
+
123
+ output += (
124
+ f"--- Result {count + 1} ---\n"
125
+ f"Title: {title}\n"
126
+ f"Summary: {first_paragraph}...\n"
127
+ f"URL: {url}\n\n"
128
+ )
129
+ count += 1
130
+ if count >= max_docs:
131
+ break
132
+
133
+ except Exception:
134
+ continue
135
+
136
+ return output.strip() or "No valid matching pages found."
137
+
138
+ except Exception as e:
139
+ return f"Wikipedia search error: {str(e)}"
140
+
141
+
142
+ @tool
143
+ def extract_text_from_image(image_path: str) -> str:
144
+ """
145
+ Extracts text from an image file.
146
+
147
+ Args:
148
+ image_path (str): The file path to the image
149
+ (e.g., '/path/to/document.png').
150
+
151
+ Returns:
152
+ str: Extracted text paragraphs separated by newlines,
153
+ prefixed with "Extracted text:\n". Returns an error message
154
+ string starting with 'Error:' on failure.
155
+ """
156
+
157
+ try:
158
+ time.sleep(2)
159
+
160
+ with open(image_path, "rb") as image_file:
161
+ image_bytes = image_file.read()
162
+
163
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
164
+
165
+ message = [
166
+ HumanMessage(
167
+ content=[
168
+ {
169
+ "type": "text",
170
+ "text": (
171
+ "Extract text or provide explanation of this image"
172
+ ),
173
+ },
174
+ {
175
+ "type": "image_url",
176
+ "image_url": {
177
+ "url": f"data:image/png;base64,{image_base64}"
178
+ },
179
+ },
180
+ ]
181
+ )
182
+ ]
183
+
184
+ response = vision_llm.invoke(message)
185
+
186
+ all_text = response.content + "\n\n"
187
+
188
+ return all_text.strip()
189
+ except Exception as e:
190
+ # A butler should handle errors gracefully
191
+ error_msg = f"Error extracting text: {str(e)}"
192
+ print(error_msg)
193
+ return ""
194
+
195
+
196
+
197
+ @tool
198
+ def analyze_file(file_path: str) -> str:
199
+ """
200
+ Load and analyze a CSV or Excel file using pandas.
201
+
202
+ Provides basic metadata and summary statistics for numeric columns.
203
+
204
+ Args:
205
+ file_path (str): Path to the CSV or Excel file.
206
+
207
+ Returns:
208
+ str: Summary statistics and metadata about the file data.
209
+ """
210
+ try:
211
+ # Determine file type
212
+ _, ext = os.path.splitext(file_path.lower())
213
+
214
+ if ext == '.csv':
215
+ df = pd.read_csv(file_path)
216
+ elif ext in ['.xls', '.xlsx']:
217
+ df = pd.read_excel(file_path)
218
+ else:
219
+ return f"Error: Unsupported file extension '{ext}'. Supported: .csv, .xls, .xlsx"
220
+
221
+ result = "Summary statistics for numeric columns:\n"
222
+ result += str(df.describe())
223
+ result += "\n\n"
224
+
225
+ result += f"Columns: {', '.join(df.columns)}\n\n"
226
+ result += "Content:\n"
227
+ result += df.astype(str).head(1000).to_string(index=False)
228
+
229
+ return result
230
+
231
+ except ImportError:
232
+ return "Error: Required libraries are not installed. Install with 'pip install pandas openpyxl'."
233
+ except FileNotFoundError:
234
+ return f"Error: File not found at path '{file_path}'."
235
+ except Exception as e:
236
+ return f"Error analyzing file: {str(e)}"
237
+
238
+
239
+ class Agent:
240
+
241
+ _api_key: str
242
+ _model_name: str
243
+ _tools: List[Tool]
244
+ _memory: ConversationBufferMemory
245
+ _llm: ChatGoogleGenerativeAI
246
+ _graph: CompiledStateGraph
247
+
248
+ def __init__(
249
+ self
250
+ ):
251
+ self._api_key = os.getenv('GOOGLE_API_KEY')
252
+ self._model_name = "gemini-2.0-flash"
253
+
254
+ self._tools = self._setup_tools()
255
+ self._llm = self._setup_llm()
256
+ self._graph = self._setup_graph()
257
+
258
+ def run(self, query: str) -> str:
259
+ max_retries: int = 3
260
+ system_prompt: str
261
+
262
+ with open('system_prompt.txt') as file:
263
+ system_prompt = SystemMessage(
264
+ content=file.read()
265
+ )
266
+
267
+ for attempt in range(max_retries):
268
+ try:
269
+
270
+ # If no match found in answer bank, use the agent
271
+ response = self._graph.invoke({
272
+ "messages": [
273
+ system_prompt,
274
+ HumanMessage(content=query)
275
+ ]
276
+ }, config={'callbacks': [ConsoleCallbackHandler()]})
277
+
278
+ return response
279
+
280
+ except Exception as e:
281
+ sleep_time = (attempt + 1) * 3
282
+ if attempt < max_retries - 1:
283
+ print(f"Attempt {attempt + 1} failed. Retrying in {sleep_time} seconds...")
284
+ time.sleep(sleep_time)
285
+
286
+ print(f"Error: {str(e)}")
287
+
288
+ continue
289
+
290
+ return f"Error processing query after {max_retries} attempts: {str(e)}"
291
+
292
+ print("Agent processed all queries!")
293
+
294
+ def _setup_llm(self) -> ChatGoogleGenerativeAI:
295
+ return ChatGoogleGenerativeAI(
296
+ model=self._model_name,
297
+ google_api_key=self._api_key,
298
+ temperature=0,
299
+ )
300
+
301
+ def _setup_tools(self) -> List[Tool]:
302
+ return [
303
+ web_search,
304
+ visit_webpage,
305
+ wikipedia_search,
306
+ extract_text_from_image,
307
+ analyze_file,
308
+ ]
309
+
310
+ def _setup_graph(self) -> CompiledStateGraph:
311
+ llm_with_tools = self._llm.bind_tools(self._tools)
312
+
313
+ def assistant(state: MessagesState):
314
+ return {
315
+ "messages": [
316
+ llm_with_tools.invoke(state["messages"])
317
+ ]
318
+ }
319
+
320
+
321
+ builder = StateGraph(MessagesState)
322
+
323
+ builder.add_node("assistant", assistant)
324
+ builder.add_node("tools", ToolNode(self._tools))
325
+
326
+ builder.add_edge(START, "assistant")
327
+ builder.add_conditional_edges(
328
+ "assistant",
329
+ tools_condition,
330
+ )
331
+ builder.add_edge("tools", "assistant")
332
+
333
+ return builder.compile()
334
+
app.py CHANGED
@@ -1,23 +1,27 @@
1
  import os
 
 
2
  import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # (Keep Constants as is)
8
- # --- Constants ---
9
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
-
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
@@ -31,48 +35,35 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
31
  username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
- print("User not logged in.")
35
  return "Please Login to Hugging Face with the button.", None
36
 
37
- api_url = DEFAULT_API_URL
38
- questions_url = f"{api_url}/questions"
39
- submit_url = f"{api_url}/submit"
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
- print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
 
47
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
50
 
51
  # 2. Fetch Questions
52
- print(f"Fetching questions from: {questions_url}")
53
  try:
54
- response = requests.get(questions_url, timeout=15)
55
- response.raise_for_status()
56
- questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
  return "Fetched questions list is empty or invalid format.", None
60
- print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
- print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
  return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
- print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
71
 
72
  # 3. Run your Agent
73
  results_log = []
74
  answers_payload = []
75
- print(f"Running agent on {len(questions_data)} questions...")
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
@@ -80,28 +71,40 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
 
85
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
 
90
  if not answers_payload:
91
- print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
  # 4. Prepare Submission
95
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
- print(status_update)
98
 
99
  # 5. Submit
100
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
  try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
- response.raise_for_status()
104
- result_data = response.json()
105
  final_status = (
106
  f"Submission Successful!\n"
107
  f"User: {result_data.get('username')}\n"
@@ -109,34 +112,36 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
109
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
  f"Message: {result_data.get('message', 'No message received.')}"
111
  )
112
- print("Submission successful.")
113
  results_df = pd.DataFrame(results_log)
 
114
  return final_status, results_df
115
  except requests.exceptions.HTTPError as e:
116
  error_detail = f"Server responded with status {e.response.status_code}."
 
117
  try:
118
  error_json = e.response.json()
119
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
  except requests.exceptions.JSONDecodeError:
121
  error_detail += f" Response: {e.response.text[:500]}"
 
122
  status_message = f"Submission Failed: {error_detail}"
123
- print(status_message)
124
  results_df = pd.DataFrame(results_log)
 
125
  return status_message, results_df
126
  except requests.exceptions.Timeout:
127
  status_message = "Submission Failed: The request timed out."
128
- print(status_message)
129
  results_df = pd.DataFrame(results_log)
 
130
  return status_message, results_df
131
  except requests.exceptions.RequestException as e:
132
  status_message = f"Submission Failed: Network error - {e}"
133
- print(status_message)
134
  results_df = pd.DataFrame(results_log)
 
135
  return status_message, results_df
136
  except Exception as e:
137
  status_message = f"An unexpected error occurred during submission: {e}"
138
- print(status_message)
139
  results_df = pd.DataFrame(results_log)
 
140
  return status_message, results_df
141
 
142
 
@@ -193,4 +198,4 @@ if __name__ == "__main__":
193
  print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
1
  import os
2
+ import tempfile
3
+ import time
4
  import gradio as gr
5
  import requests
6
  import inspect
7
  import pandas as pd
8
+ from agent import Agent
9
+ from evaluation_api import EvaluationApi
10
+
11
+
12
+ def save_tmp_file(file_name: str, content: bytes) -> str:
13
+ temp_dir = tempfile.gettempdir()
14
+ if file_name is None:
15
+ temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
16
+ file_path = temp_file.name
17
+ else:
18
+ file_path = os.path.join(temp_dir, file_name)
19
+
20
+ with open(file_path, "wb") as file:
21
+ file.write(content)
22
+
23
+ return file_path
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def run_and_submit_all( profile: gr.OAuthProfile | None):
27
  """
 
35
  username= f"{profile.username}"
36
  print(f"User logged in: {username}")
37
  else:
 
38
  return "Please Login to Hugging Face with the button.", None
39
 
40
+ evaluation_api = EvaluationApi()
 
 
41
 
42
  # 1. Instantiate Agent ( modify this part to create your agent)
43
  try:
44
+ agent = Agent()
45
  except Exception as e:
 
46
  return f"Error initializing agent: {e}", None
47
+
48
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
49
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
 
50
 
51
  # 2. Fetch Questions
 
52
  try:
53
+ questions_data = evaluation_api.get_questions()
 
 
54
  if not questions_data:
 
55
  return "Fetched questions list is empty or invalid format.", None
 
56
  except requests.exceptions.RequestException as e:
 
57
  return f"Error fetching questions: {e}", None
58
  except requests.exceptions.JSONDecodeError as e:
 
 
59
  return f"Error decoding server response for questions: {e}", None
60
  except Exception as e:
 
61
  return f"An unexpected error occurred fetching questions: {e}", None
62
 
63
  # 3. Run your Agent
64
  results_log = []
65
  answers_payload = []
66
+
67
  for item in questions_data:
68
  task_id = item.get("task_id")
69
  question_text = item.get("question")
 
71
  print(f"Skipping item with missing task_id or question: {item}")
72
  continue
73
  try:
74
+ time.sleep(10)
75
+
76
+ print(f"Agent received question (first 50 chars): {question_text[:50]}...")
77
+
78
+ question = f"Question: `{question_text}`"
79
+ if file_name := item.get("file_name"):
80
+ print('question has file')
81
+ file_content = evaluation_api.get_file(task_id)
82
+ file_path = save_tmp_file(file_name, file_content)
83
+ question = f"{question} File path: `{file_path}`"
84
+
85
+ messages = agent.run(question)
86
+
87
+ final_answer = messages['messages'][-1].content
88
+ print(f"final_answer: {final_answer}")
89
+ submitted_answer = final_answer.split('ANSWER: ')[-1]
90
+
91
+
92
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
93
+
94
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
95
  except Exception as e:
96
+ print(f"AGENT ERROR: {e}")
97
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
98
 
99
  if not answers_payload:
 
100
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
101
 
102
  # 4. Prepare Submission
103
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
104
 
105
  # 5. Submit
 
106
  try:
107
+ result_data = evaluation_api.submit(submission_data)
 
 
108
  final_status = (
109
  f"Submission Successful!\n"
110
  f"User: {result_data.get('username')}\n"
 
112
  f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
113
  f"Message: {result_data.get('message', 'No message received.')}"
114
  )
 
115
  results_df = pd.DataFrame(results_log)
116
+
117
  return final_status, results_df
118
  except requests.exceptions.HTTPError as e:
119
  error_detail = f"Server responded with status {e.response.status_code}."
120
+
121
  try:
122
  error_json = e.response.json()
123
  error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
124
  except requests.exceptions.JSONDecodeError:
125
  error_detail += f" Response: {e.response.text[:500]}"
126
+
127
  status_message = f"Submission Failed: {error_detail}"
 
128
  results_df = pd.DataFrame(results_log)
129
+
130
  return status_message, results_df
131
  except requests.exceptions.Timeout:
132
  status_message = "Submission Failed: The request timed out."
 
133
  results_df = pd.DataFrame(results_log)
134
+
135
  return status_message, results_df
136
  except requests.exceptions.RequestException as e:
137
  status_message = f"Submission Failed: Network error - {e}"
 
138
  results_df = pd.DataFrame(results_log)
139
+
140
  return status_message, results_df
141
  except Exception as e:
142
  status_message = f"An unexpected error occurred during submission: {e}"
 
143
  results_df = pd.DataFrame(results_log)
144
+
145
  return status_message, results_df
146
 
147
 
 
198
  print("-"*(60 + len(" App Starting ")) + "\n")
199
 
200
  print("Launching Gradio Interface for Basic Agent Evaluation...")
201
+ demo.launch(debug=True, share=False)
dry_run.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import tempfile
3
+ import json
4
+ import os
5
+
6
+ from agent import Agent
7
+
8
+ random.seed(1)
9
+
10
+ def get_question(file_path: str) -> str:
11
+ with open(file_path, "r") as file:
12
+ lines = file.readlines()
13
+ line_number = random.randrange(len(lines))
14
+
15
+ return lines[line_number]
16
+
17
+
18
+ def get_file(file_name: str) -> bytes:
19
+ with open(f"./gaia_2023_set/{file_name}", "rb") as file:
20
+ return file.read()
21
+
22
+
23
+ def save_tmp_file(file_name: str, content: bytes) -> str:
24
+ temp_dir = tempfile.gettempdir()
25
+ if file_name is None:
26
+ temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
27
+ file_path = temp_file.name
28
+ else:
29
+ file_path = os.path.join(temp_dir, file_name)
30
+
31
+ with open(file_path, "wb") as file:
32
+ file.write(content)
33
+
34
+ return file_path
35
+
36
+
37
+ question_json: str = get_question('./gaia_2023_set/metadata.jsonl')
38
+ question = json.loads(question_json)
39
+
40
+ print(json.dumps(question, indent=2))
41
+
42
+ # file_name = question["file_name"]
43
+ # file_content = get_file(question["file_name"])
44
+ # file_path = save_tmp_file(file_name, file_content)
45
+
46
+ # print(file_path)
47
+
48
+ agent = Agent()
49
+
50
+ # messages = agent.run(f"Question: `{question["Question"]}` File path: {file_path}")
51
+ messages = agent.run(f"Question: `{question["Question"]}`")
52
+
53
+ final_answer = messages['messages'][-1].content
54
+ submitted_answer = final_answer.split('FINAL ANSWER: ')[-1]
55
+
56
+ print(final_answer)
57
+
58
+ print(submitted_answer)
59
+
60
+
evaluation_api.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ class EvaluationApi:
4
+
5
+ api_url: str = "https://agents-course-unit4-scoring.hf.space"
6
+ timeout: int = 30
7
+
8
+ def get_questions(self) -> list[dict]:
9
+ response = requests.get(f"{self.api_url}/questions", timeout=self.timeout)
10
+ response.raise_for_status()
11
+
12
+ return response.json()
13
+
14
+ def get_file(self, task_id: str) -> bytes:
15
+ response = requests.get(f"{self.api_url}/files/{task_id}", timeout=self.timeout)
16
+ response.raise_for_status()
17
+
18
+ return response.content
19
+
20
+ def get_random_question(self) -> dict:
21
+ response = requests.get(f"{self.api_url}/random-question", timeout=self.timeout)
22
+ response.raise_for_status()
23
+
24
+ return response.json()
25
+
26
+ def submit(self, data: dict) -> dict:
27
+ response = requests.post(f"{self.api_url}/submit", json=data, timeout=self.timeout)
28
+ response.raise_for_status()
29
+
30
+ return response.json()
requirements.txt CHANGED
@@ -1,2 +1,30 @@
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ requests
3
+ pandas
4
+ openpyxl
5
+ openai
6
+ google-genai
7
+ google-generativeai
8
+ langchain
9
+ langchain-community
10
+ langchain-core
11
+ langchain-google-genai
12
+ langgraph
13
+ huggingface_hub
14
+ python-dotenv
15
+ wikipedia-api
16
+ wikipedia
17
+ arxiv
18
+ datasets
19
+ yt-dlp
20
+ google-cloud-speech
21
+ google-api-python-client
22
+ duckduckgo-search
23
+ pytesseract
24
+ tavily-python
25
+ langchain_groq
26
+ langchain-tavily
27
+ mwclient
28
+ langchain_experimental
29
+ easyocr
30
+ smolagents
system_prompt.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ You are a rapid, concise AI assistant. Your primary goal is to provide quick and direct answers according to the specified format.
2
+ Briefly report your essential reasoning steps before the final answer.
3
+ Deliver your final answer strictly following this template: FINAL ANSWER: [YOUR FINAL ANSWER].
4
+ YOUR FINAL ANSWER must be the most direct response, limited to a single number, the absolute fewest necessary words, or a comma-separated list of numbers and/or strings.
5
+ Adhere to these strict formatting requirements for the content of the FINAL ANSWER:
6
+ - **Numbers:** Do not use commas for digit grouping (e.g., use 1000, not 1,000). Exclude units like '$' or '%' unless they are explicitly requested as part of the answer.
7
+ - **Strings:** Do not include articles (a, an, the). Do not use abbreviations. Any digits that are part of a string must be written out in plain text (e.g., "level two" instead of "level 2"), unless you are specifically instructed to use numerals.
8
+ - **Lists:** Apply the above formatting rules for numbers and strings to each corresponding element within the comma-separated list.