polyMoe commited on
Commit
cb20efa
·
1 Parent(s): 3f1906e

update (app.py, graph.py, tools.py) : ajout récup des fichiers questions, et gestion du graph et des outils + premier test run (3 bonnes réponses)

Browse files
Files changed (7) hide show
  1. .gitignore +5 -1
  2. app.py +122 -38
  3. cached_answers.json +18 -20
  4. graph.py +98 -0
  5. load_data.py +119 -0
  6. requirements.txt +10 -1
  7. tools.py +483 -0
.gitignore CHANGED
@@ -1,5 +1,9 @@
1
  .devcontainer/
2
  .devcontainer/*
 
3
  .env
4
  requirements.local.txt
5
- README.local.md
 
 
 
 
1
  .devcontainer/
2
  .devcontainer/*
3
+ data/
4
  .env
5
  requirements.local.txt
6
+ README.local.md
7
+ *ipynb
8
+ *.json
9
+ *.csv
app.py CHANGED
@@ -1,35 +1,81 @@
1
  import os
 
 
 
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
6
- import json
 
 
 
 
 
 
 
 
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
 
12
  # --- Basic Agent Definition ---
13
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
14
  class BasicAgent:
15
  def __init__(self):
16
  print("BasicAgent initialized.")
 
17
  def __call__(self, question: str) -> str:
18
  print(f"Agent received question (first 50 chars): {question[:50]}...")
19
  fixed_answer = "This is a default answer."
20
  print(f"Agent returning fixed answer: {fixed_answer}")
21
  return fixed_answer
22
 
23
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  """
25
  Fetches all questions, runs the BasicAgent on them, submits all answers,
26
  and displays the results.
27
  """
28
  # --- Determine HF Space Runtime URL and Repo URL ---
29
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
30
 
31
  if profile:
32
- username= f"{profile.username}"
33
  print(f"User logged in: {username}")
34
  else:
35
  print("User not logged in.")
@@ -40,33 +86,37 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
40
  submit_url = f"{api_url}/submit"
41
 
42
  # 1. Instantiate Agent ( modify this part to create your agent)
43
- try:
44
- agent = BasicAgent()
45
- except Exception as e:
46
- print(f"Error instantiating agent: {e}")
47
- return f"Error initializing agent: {e}", None
48
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
49
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
50
  print(agent_code)
51
 
52
  # 2. Load Questions (cache first, API fallback)
53
- cache_path = os.path.join(os.path.dirname(__file__), "cached_questions.json")
54
  questions_data = None
55
 
56
  # 2.a Try cache first
57
- if os.path.exists(cache_path):
58
  try:
59
  with open(cache_path, "r", encoding="utf-8") as f:
60
  cached = json.load(f)
61
  if isinstance(cached, list) and cached:
62
  questions_data = cached
63
- print(f"Loaded {len(questions_data)} questions from cache: {cache_path}")
 
 
64
  else:
65
  print(f"Cache file found but empty/invalid format: {cache_path}")
66
  except json.JSONDecodeError as e:
67
  print(f"Cache JSON is invalid ({cache_path}): {e}. Falling back to API.")
68
  except OSError as e:
69
- print(f"Could not read cache file ({cache_path}): {e}. Falling back to API.")
 
 
70
 
71
  # 2.b Fetch from API only if cache missing/invalid/empty
72
  if questions_data is None:
@@ -101,27 +151,41 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
101
  print(f"An unexpected error occurred fetching questions: {e}")
102
  return f"An unexpected error occurred fetching questions: {e}", None
103
 
 
 
 
 
 
 
104
  # 3. Run your Agent (answers cache by task_id)
105
  results_log = []
106
  answers_payload = []
107
 
108
- answers_cache_path = os.path.join(os.path.dirname(__file__), "cached_answers.json")
109
  answers_cache = {}
110
 
111
  # 3.a Load answers cache
112
- if os.path.exists(answers_cache_path):
113
  try:
114
  with open(answers_cache_path, "r", encoding="utf-8") as f:
115
  loaded_cache = json.load(f)
116
  if isinstance(loaded_cache, dict):
117
  answers_cache = loaded_cache
118
- print(f"Loaded {len(answers_cache)} cached answers from: {answers_cache_path}")
 
 
119
  else:
120
- print(f"Answers cache has invalid format (expected object): {answers_cache_path}")
 
 
121
  except json.JSONDecodeError as e:
122
- print(f"Answers cache JSON is invalid ({answers_cache_path}): {e}. Starting with empty cache.")
 
 
123
  except OSError as e:
124
- print(f"Could not read answers cache ({answers_cache_path}): {e}. Starting with empty cache.")
 
 
125
 
126
  cache_updated = False
127
 
@@ -142,18 +206,31 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
142
  print(f"Using cached answer for task_id={task_id}")
143
  else:
144
  try:
145
- submitted_answer = agent(question_text)
146
  answers_cache[task_key] = submitted_answer
147
  cache_updated = True
148
  print(f"Computed and cached answer for task_id={task_id}")
149
  except Exception as e:
150
  print(f"Error running agent on task {task_id}: {e}")
151
  results_log.append(
152
- {"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
153
  continue
154
 
155
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
156
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
157
 
158
  # 3.b Save answers cache only if updated
159
  if cache_updated:
@@ -168,8 +245,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
168
  print("Agent did not produce any answers to submit.")
169
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
170
 
171
- # 4. Prepare Submission
172
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
173
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
174
  print(status_update)
175
 
@@ -239,20 +320,19 @@ with gr.Blocks() as demo:
239
 
240
  run_button = gr.Button("Run Evaluation & Submit All Answers")
241
 
242
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
 
243
  # Removed max_rows=10 from DataFrame constructor
244
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
245
 
246
- run_button.click(
247
- fn=run_and_submit_all,
248
- outputs=[status_output, results_table]
249
- )
250
 
251
  if __name__ == "__main__":
252
- print("\n" + "-"*30 + " App Starting " + "-"*30)
253
  # Check for SPACE_HOST and SPACE_ID at startup for information
254
  space_host_startup = os.getenv("SPACE_HOST")
255
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
256
 
257
  if space_host_startup:
258
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -260,14 +340,18 @@ if __name__ == "__main__":
260
  else:
261
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
262
 
263
- if space_id_startup: # Print repo URLs if SPACE_ID is found
264
  print(f"✅ SPACE_ID found: {space_id_startup}")
265
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
266
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
267
  else:
268
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
269
 
270
- print("-"*(60 + len(" App Starting ")) + "\n")
271
 
272
  print("Launching Gradio Interface for Basic Agent Evaluation...")
273
- demo.launch(debug=True, share=False)
 
1
  import os
2
+ import json
3
+ from pathlib import Path
4
+
5
  import gradio as gr
6
  import requests
 
7
  import pandas as pd
8
+ from langchain_core.messages import HumanMessage
9
+
10
+ from load_data import (
11
+ ensure_validation_data,
12
+ get_file_from_gaia_level1_data,
13
+ get_question,
14
+ )
15
+ from graph import react_graph
16
+
17
 
18
  # (Keep Constants as is)
19
  # --- Constants ---
20
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
 
22
+
23
  # --- Basic Agent Definition ---
24
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
25
  class BasicAgent:
26
  def __init__(self):
27
  print("BasicAgent initialized.")
28
+
29
  def __call__(self, question: str) -> str:
30
  print(f"Agent received question (first 50 chars): {question[:50]}...")
31
  fixed_answer = "This is a default answer."
32
  print(f"Agent returning fixed answer: {fixed_answer}")
33
  return fixed_answer
34
 
35
+
36
+ def _invoke_react_graph(task_id: str) -> str:
37
+ """
38
+ Invokes the react graph with the given task_id and returns the final answer.
39
+ """
40
+ input_file = get_file_from_gaia_level1_data(task_id)
41
+ question = get_question(task_id)
42
+ print(
43
+ f"Invoking react graph for task_id={task_id} with question: {question[:50]}... and input_file: {input_file}"
44
+ )
45
+
46
+ messages = [HumanMessage(content=question)]
47
+
48
+ messages = react_graph.invoke(
49
+ {"messages": messages, "input_file": input_file},
50
+ config={"recursion_limit": 50},
51
+ )
52
+
53
+ final_message = messages["messages"][-1]
54
+ print(f"Final message from react graph: {final_message.content[:100]}...")
55
+
56
+ # Extract the final answer from the message content
57
+ final_answer_prefix = "FINAL ANSWER:"
58
+ if final_answer_prefix in final_message.content:
59
+ final_answer = final_message.content.split(final_answer_prefix)[-1].strip()
60
+ print(f"Extracted final answer: {final_answer}")
61
+ return final_answer
62
+ else:
63
+ print(
64
+ f"Warning: 'FINAL ANSWER:' prefix not found in react graph output. Returning full message content as answer."
65
+ )
66
+ return final_message.content.strip()
67
+
68
+
69
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
70
  """
71
  Fetches all questions, runs the BasicAgent on them, submits all answers,
72
  and displays the results.
73
  """
74
  # --- Determine HF Space Runtime URL and Repo URL ---
75
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
76
 
77
  if profile:
78
+ username = f"{profile.username}"
79
  print(f"User logged in: {username}")
80
  else:
81
  print("User not logged in.")
 
86
  submit_url = f"{api_url}/submit"
87
 
88
  # 1. Instantiate Agent ( modify this part to create your agent)
89
+ # try:
90
+ # agent = BasicAgent()
91
+ # except Exception as e:
92
+ # print(f"Error instantiating agent: {e}")
93
+ # return f"Error initializing agent: {e}", None
94
  # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
95
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
96
  print(agent_code)
97
 
98
  # 2. Load Questions (cache first, API fallback)
99
+ cache_path = Path(__file__).resolve().parent / "cached_questions.json"
100
  questions_data = None
101
 
102
  # 2.a Try cache first
103
+ if cache_path.exists():
104
  try:
105
  with open(cache_path, "r", encoding="utf-8") as f:
106
  cached = json.load(f)
107
  if isinstance(cached, list) and cached:
108
  questions_data = cached
109
+ print(
110
+ f"Loaded {len(questions_data)} questions from cache: {cache_path}"
111
+ )
112
  else:
113
  print(f"Cache file found but empty/invalid format: {cache_path}")
114
  except json.JSONDecodeError as e:
115
  print(f"Cache JSON is invalid ({cache_path}): {e}. Falling back to API.")
116
  except OSError as e:
117
+ print(
118
+ f"Could not read cache file ({cache_path}): {e}. Falling back to API."
119
+ )
120
 
121
  # 2.b Fetch from API only if cache missing/invalid/empty
122
  if questions_data is None:
 
151
  print(f"An unexpected error occurred fetching questions: {e}")
152
  return f"An unexpected error occurred fetching questions: {e}", None
153
 
154
+ # 2.c Retrieve the data files provided for the test ( in the case of the test on Hugging Face, the files are in data/2023_level1/validation/)
155
+ base_dir = Path(__file__).resolve().parent
156
+ ok, error_message = ensure_validation_data(base_dir)
157
+ if not ok:
158
+ return error_message, None
159
+
160
  # 3. Run your Agent (answers cache by task_id)
161
  results_log = []
162
  answers_payload = []
163
 
164
+ answers_cache_path = Path(__file__).resolve().parent / "cached_answers.json"
165
  answers_cache = {}
166
 
167
  # 3.a Load answers cache
168
+ if answers_cache_path.exists():
169
  try:
170
  with open(answers_cache_path, "r", encoding="utf-8") as f:
171
  loaded_cache = json.load(f)
172
  if isinstance(loaded_cache, dict):
173
  answers_cache = loaded_cache
174
+ print(
175
+ f"Loaded {len(answers_cache)} cached answers from: {answers_cache_path}"
176
+ )
177
  else:
178
+ print(
179
+ f"Answers cache has invalid format (expected object): {answers_cache_path}"
180
+ )
181
  except json.JSONDecodeError as e:
182
+ print(
183
+ f"Answers cache JSON is invalid ({answers_cache_path}): {e}. Starting with empty cache."
184
+ )
185
  except OSError as e:
186
+ print(
187
+ f"Could not read answers cache ({answers_cache_path}): {e}. Starting with empty cache."
188
+ )
189
 
190
  cache_updated = False
191
 
 
206
  print(f"Using cached answer for task_id={task_id}")
207
  else:
208
  try:
209
+ submitted_answer = _invoke_react_graph(task_key)
210
  answers_cache[task_key] = submitted_answer
211
  cache_updated = True
212
  print(f"Computed and cached answer for task_id={task_id}")
213
  except Exception as e:
214
  print(f"Error running agent on task {task_id}: {e}")
215
  results_log.append(
216
+ {
217
+ "Task ID": task_id,
218
+ "Question": question_text,
219
+ "Submitted Answer": f"AGENT ERROR: {e}",
220
+ }
221
+ )
222
  continue
223
 
224
+ answers_payload.append(
225
+ {"task_id": task_id, "submitted_answer": submitted_answer}
226
+ )
227
+ results_log.append(
228
+ {
229
+ "Task ID": task_id,
230
+ "Question": question_text,
231
+ "Submitted Answer": submitted_answer,
232
+ }
233
+ )
234
 
235
  # 3.b Save answers cache only if updated
236
  if cache_updated:
 
245
  print("Agent did not produce any answers to submit.")
246
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
247
 
248
+ # 4. Prepare Submission
249
+ submission_data = {
250
+ "username": username.strip(),
251
+ "agent_code": agent_code,
252
+ "answers": answers_payload,
253
+ }
254
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
255
  print(status_update)
256
 
 
320
 
321
  run_button = gr.Button("Run Evaluation & Submit All Answers")
322
 
323
+ status_output = gr.Textbox(
324
+ label="Run Status / Submission Result", lines=5, interactive=False
325
+ )
326
  # Removed max_rows=10 from DataFrame constructor
327
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
328
 
329
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
330
 
331
  if __name__ == "__main__":
332
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
333
  # Check for SPACE_HOST and SPACE_ID at startup for information
334
  space_host_startup = os.getenv("SPACE_HOST")
335
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
336
 
337
  if space_host_startup:
338
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
340
  else:
341
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
342
 
343
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
344
  print(f"✅ SPACE_ID found: {space_id_startup}")
345
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
346
+ print(
347
+ f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
348
+ )
349
  else:
350
+ print(
351
+ "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
352
+ )
353
 
354
+ print("-" * (60 + len(" App Starting ")) + "\n")
355
 
356
  print("Launching Gradio Interface for Basic Agent Evaluation...")
357
+ demo.launch(debug=True, share=False)
cached_answers.json CHANGED
@@ -1,22 +1,20 @@
1
  {
2
- "8e867cd7-cff9-4e6c-867a-ff5ddc2550be": "This is a default answer.",
3
- "a1e91b78-d3d8-4675-bb8d-62741b4b68a6": "This is a default answer.",
4
- "2d83110e-a098-4ebb-9987-066c06fa42d0": "This is a default answer.",
5
- "cca530fc-4052-43b2-b130-b30968d8aa44": "This is a default answer.",
6
- "4fc2f1ae-8625-45b5-ab34-ad4433bc21f8": "This is a default answer.",
7
- "6f37996b-2ac7-44b0-8e68-6d28256631b4": "This is a default answer.",
8
- "9d191bce-651d-4746-be2d-7ef8ecadb9c2": "This is a default answer.",
9
- "cabe07ed-9eca-40ea-8ead-410ef5e83f91": "This is a default answer.",
10
- "3cef3a44-215e-4aed-8e3b-b1e3f08063b7": "This is a default answer.",
11
- "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3": "This is a default answer.",
12
- "305ac316-eef6-4446-960a-92d80d542f82": "This is a default answer.",
13
- "f918266a-b3e0-4914-865d-4faa564f1aef": "This is a default answer.",
14
- "3f57289b-8c60-48be-bd80-01f8099ca449": "This is a default answer.",
15
- "1f975693-876d-457b-a649-393859e79bf3": "This is a default answer.",
16
- "840bfca7-4f7b-481a-8794-c560c340185d": "This is a default answer.",
17
- "bda648d7-d618-4883-88f4-3466eabd860e": "This is a default answer.",
18
- "cf106601-ab4f-4af9-b045-5295fe67b37d": "This is a default answer.",
19
- "a0c07678-e491-4bbc-8f0b-07405144218f": "This is a default answer.",
20
- "7bd855d8-463d-4ed5-93ca-5fe35145f733": "This is a default answer.",
21
- "5a0c1adf-205e-4841-a666-7c3ef95def9d": "This is a default answer."
22
  }
 
1
  {
2
+ "8e867cd7-cff9-4e6c-867a-ff5ddc2550be": "4",
3
+ "a1e91b78-d3d8-4675-bb8d-62741b4b68a6": "I cannot directly access or analyze the content of the video link provided. To determine the highest number of bird species on camera simultaneously, I would need either a description of the video content or a summary of relevant information. Alternatively, I can try to find a description or discussion about this specific video if available online. Would you like me to search for any details or summaries related to this video?",
4
+ "2d83110e-a098-4ebb-9987-066c06fa42d0": "The sentence written in reverse is: \".right\"",
5
+ "cca530fc-4052-43b2-b130-b30968d8aa44": "The image contains a chessboard with coordinates labeled from a to h horizontally and 1 to 8 vertically. To identify the best move for black, I need to analyze the position. Since I cannot see the actual pieces from the text alone, I will rely on the extracted text and the typical context of such puzzles.\n\nHowever, the image's text only shows the coordinate labels, not the pieces or their positions. To accurately determine the winning move, I need to see the actual arrangement of the pieces on the board.\n\nPlease provide a detailed description of the position or upload an image showing the pieces on the board.",
6
+ "6f37996b-2ac7-44b0-8e68-6d28256631b4": "b,e",
7
+ "9d191bce-651d-4746-be2d-7ef8ecadb9c2": "I don't have the capability to directly access or view videos, including YouTube links. However, I can help if you provide a transcript or more context about the scene. Alternatively, I can try to find a script or quote from that episode if you tell me which Stargate SG-1 episode it is from. Please specify or provide more details.",
8
+ "cabe07ed-9eca-40ea-8ead-410ef5e83f91": "None",
9
+ "3cef3a44-215e-4aed-8e3b-b1e3f08063b7": "broccoli, celery, green beans, lettuce, sweet potatoes",
10
+ "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3": "It appears I am unable to directly listen to or process audio files. However, if you can provide a transcript or the key details from the voice memo, I can help identify the ingredients for the filling. Please share any text or details you have.",
11
+ "305ac316-eef6-4446-960a-92d80d542f82": "Bartłomiej",
12
+ "f918266a-b3e0-4914-865d-4faa564f1aef": "0",
13
+ "3f57289b-8c60-48be-bd80-01f8099ca449": "I need to verify the player with the most walks for the Yankees in 1977. Based on the search results, Derek Jeter is a notable Yankees player, but he played in the late 1990s and 2000s, so he is unlikely to be the player with the most walks in 1977. \n\nThe search results do not clearly indicate the player with the most walks in 1977. I will now look for specific statistics for Yankees players in 1977 to identify who had the most walks and at-bats.\n\nI'll search for Yankees 1977 player stats to find the player with the most walks and then check their at-bats.",
14
+ "1f975693-876d-457b-a649-393859e79bf3": "It appears that I am unable to directly process audio files. However, I can guide you on how to transcribe the audio using available tools or software such as speech recognition libraries or online transcription services. Would you like me to provide a step-by-step guide on how to do this?",
15
+ "840bfca7-4f7b-481a-8794-c560c340185d": "",
16
+ "bda648d7-d618-4883-88f4-3466eabd860e": "Saint Petersburg",
17
+ "cf106601-ab4f-4af9-b045-5295fe67b37d": "CUB",
18
+ "a0c07678-e491-4bbc-8f0b-07405144218f": "Ohtani, Kondoh.",
19
+ "5a0c1adf-205e-4841-a666-7c3ef95def9d": "Let's analyze the question step-by-step:\n\n1. The question asks for the first name of a specific individual: the only Malko Competition recipient from the 20th century (after 1977).\n2. This individual’s nationality is from a country that no longer exists.\n3. The recipient must have received the Malko Competition award after 1977, which is in the late 20th century.\n4. The country of nationality must be a defunct country.\n\nFirst, I need to identify the recipients of the Malko Competition (also known as the Malko Competition for Young Conductors). I will search for a list of recipients and their nationalities, focusing on those after 1977.\n\nI will use the Tavily search to find relevant information about the Malko Competition winners.\n\nLet's proceed with the search.\n\ntavily_search: \"Malko Competition winners list\""
 
 
20
  }
graph.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # std lib
2
+ import os
3
+
4
+ # 3rd party imports
5
+ from typing import TypedDict, Annotated, Optional
6
+ from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
7
+ from langchain_openai import ChatOpenAI
8
+ from langgraph.graph.message import add_messages
9
+ from langgraph.graph import START, StateGraph
10
+ from langgraph.prebuilt import ToolNode, tools_condition
11
+
12
+ # local imports
13
+ from tools import (
14
+ select_tools_for_input,
15
+ )
16
+
17
+
18
+ openai_token = os.getenv("HF_FINAL_ASSIGNMENT_OPENAI")
19
+
20
+ llm = ChatOpenAI(model="gpt-4.1-nano", api_key=openai_token, temperature=0)
21
+
22
+
23
+ class AgentState(TypedDict):
24
+ # The input document
25
+ input_file: Optional[str] # Contains file path, type (ANY)
26
+ messages: Annotated[list[AnyMessage], add_messages]
27
+
28
+
29
+ def _selected_tools_from_state(state: AgentState):
30
+ return select_tools_for_input(state.get("input_file"))
31
+
32
+
33
+ def _build_tools_description(selected_tools: list) -> str:
34
+ lines = []
35
+ for fn in selected_tools:
36
+ doc = (fn.__doc__ or "").strip().split("\n")[0]
37
+ if doc:
38
+ lines.append(f"- {fn.__name__}: {doc}")
39
+ else:
40
+ lines.append(f"- {fn.__name__}")
41
+ return "\n".join(lines)
42
+
43
+
44
+ def assistant(state: AgentState):
45
+ data_file = state["input_file"]
46
+ selected_tools = _selected_tools_from_state(state)
47
+ llm_with_tools = llm.bind_tools(selected_tools, parallel_tool_calls=False)
48
+ tools_description = _build_tools_description(selected_tools)
49
+
50
+ sys_msg = SystemMessage(
51
+ content=(
52
+ "You are a general AI assistant. I will ask you a question. "
53
+ "Report your thoughts, and finish your answer with the following template: "
54
+ "FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible "
55
+ "OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma "
56
+ "to write your number neither use units such as $ or percent sign unless specified otherwise. "
57
+ "If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write "
58
+ "the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply "
59
+ "the above rules depending of whether the element to be put in the list is a number or a string.\n\n"
60
+ f"Available tools for this input:\n{tools_description}"
61
+ )
62
+ )
63
+
64
+ prompt_messages = [sys_msg] + state["messages"]
65
+ if data_file:
66
+ prompt_messages.append(
67
+ HumanMessage(content=f"Input file path (local): {data_file}")
68
+ )
69
+ print("Prompt messages for assistant:")
70
+ for msg in prompt_messages:
71
+ print(f"- {msg.content}")
72
+
73
+ response = llm_with_tools.invoke(prompt_messages)
74
+ return {"messages": [response], "input_file": state["input_file"]}
75
+
76
+
77
+ def tools_node(state: AgentState):
78
+ selected_tools = _selected_tools_from_state(state)
79
+ return ToolNode(selected_tools).invoke(state)
80
+
81
+
82
+ # Graph
83
+ builder = StateGraph(AgentState)
84
+
85
+ # Define nodes: these do the work
86
+ builder.add_node("assistant", assistant)
87
+ builder.add_node("tools", tools_node)
88
+
89
+ # Define edges: these determine how the control flow moves
90
+ builder.add_edge(START, "assistant")
91
+ builder.add_conditional_edges(
92
+ "assistant",
93
+ # If the latest message (result) from assistant is a tool call -> tools_condition routes to tools
94
+ # If the latest message (result) from assistant is a not a tool call -> tools_condition routes to END
95
+ tools_condition,
96
+ )
97
+ builder.add_edge("tools", "assistant")
98
+ react_graph = builder.compile()
load_data.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # std lib
2
+ import os
3
+ from pathlib import Path
4
+
5
+ # 3rd party imports
6
+ import pandas as pd
7
+
8
+ # local imports
9
+ from huggingface_hub import snapshot_download
10
+
11
+ DATA_DIR = Path(__file__).resolve().parent / "data"
12
+
13
+
14
+ def get_full_gaia_level1_data():
15
+ """
16
+ Download the full GAIA level 1 data and save it locally in data/.
17
+ """
18
+ os.makedirs(DATA_DIR, exist_ok=True)
19
+
20
+ # Fallback to common HF token names; token can be optional for public datasets.
21
+ token = os.getenv("HF_FINAL_ASSIGNMENT_DRAFT") or os.getenv("HF_TOKEN")
22
+
23
+ kwargs = {
24
+ "repo_id": "gaia-benchmark/GAIA",
25
+ "repo_type": "dataset",
26
+ "local_dir": DATA_DIR,
27
+ }
28
+ if token:
29
+ kwargs["token"] = token
30
+
31
+ snapshot_download(**kwargs)
32
+
33
+
34
+ def get_file_from_gaia_level1_data(task_id: str):
35
+ """
36
+ Given a GAIA level 1 task ID, return the corresponding file path in the local directory data/gaia_level1_data.
37
+ les fichiers servant au test HF sont dans data/gaia_level1_data/2023_level1/validation/
38
+ """
39
+ # lire le contenu du dossier data/gaia_level1_data/2023_level1/validation et afficher les chemins des fichiers présents
40
+ validation_dir = DATA_DIR / "2023" / "validation"
41
+ for root, _, files in os.walk(validation_dir):
42
+ for file in files:
43
+ if task_id in file:
44
+ return os.path.join(root, file)
45
+ return None
46
+
47
+
48
+ def get_question(task_id: str) -> str:
49
+ """
50
+ Given a GAIA level 1 task ID, return the corresponding question.
51
+ """
52
+ metadata_file = os.path.join(
53
+ DATA_DIR, "2023", "validation", "metadata.level1.parquet"
54
+ )
55
+ if os.path.exists(metadata_file):
56
+ metadata_df = pd.read_parquet(metadata_file)
57
+ return metadata_df.loc[metadata_df["task_id"] == task_id, "Question"].values[0]
58
+ else:
59
+ print(f"Metadata file not found: {metadata_file}")
60
+ return ""
61
+
62
+
63
+ def ensure_validation_data(base_dir: Path):
64
+ """
65
+ Ensure GAIA 2023 level1 validation files are present.
66
+ Returns (ok: bool, error_message: str | None).
67
+ """
68
+ validation_dir = base_dir / "data" / "2023" / "validation"
69
+ metadata_file = validation_dir / "metadata.level1.parquet"
70
+ need_download_reason = None
71
+
72
+ # Fast fail: missing or empty validation directory
73
+ if not validation_dir.exists() or not any(validation_dir.iterdir()):
74
+ need_download_reason = f"Validation data not found in {validation_dir}"
75
+ # Metadata is required to validate expected files
76
+ elif not metadata_file.exists():
77
+ need_download_reason = (
78
+ f"Metadata file not found: {metadata_file}. Cannot verify expected files"
79
+ )
80
+ else:
81
+ try:
82
+ # Read only the needed column for speed/memory
83
+ metadata_df = pd.read_parquet(metadata_file, columns=["file_name"])
84
+ expected_files = {
85
+ str(name) for name in metadata_df["file_name"].dropna().unique()
86
+ }
87
+
88
+ present_files = {p.name for p in validation_dir.iterdir() if p.is_file()}
89
+ missing_files = expected_files - present_files
90
+
91
+ if missing_files:
92
+ need_download_reason = (
93
+ f"Missing {len(missing_files)} expected validation files"
94
+ )
95
+ except Exception as e:
96
+ need_download_reason = (
97
+ f"Error reading metadata ({metadata_file}): {e}. "
98
+ "Cannot verify expected files"
99
+ )
100
+
101
+ if need_download_reason is not None:
102
+ print(f"{need_download_reason}. Downloading full GAIA level 1 data...")
103
+ try:
104
+ get_full_gaia_level1_data()
105
+ print("Data download completed.")
106
+ except Exception as e:
107
+ error_message = f"Error downloading GAIA level 1 data: {e}"
108
+ print(error_message)
109
+ return False, error_message
110
+ else:
111
+ print("All expected validation files are present. Skipping data download.")
112
+
113
+ return True, None
114
+
115
+
116
+ if __name__ == "__main__":
117
+ # get_full_gaia_level1_data()
118
+ print(get_file_from_gaia_level1_data("cca530fc-4052-43b2-b130-b30968d8aa44"))
119
+ # print(get_question("cca530fc-4052-43b2-b130-b30968d8aa44"))
requirements.txt CHANGED
@@ -1,2 +1,11 @@
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ requests
3
+ pandas
4
+ pyarrow
5
+ huggingface_hub
6
+ langchain-core
7
+ langchain-openai
8
+ langgraph
9
+ tavily-python
10
+ wikipedia
11
+ youtube-transcript-api
tools.py ADDED
@@ -0,0 +1,483 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # std lib
2
+ import base64
3
+ from typing import Optional
4
+ import os
5
+ import requests
6
+ from pathlib import Path
7
+
8
+ # 3rd party imports
9
+ import pandas as pd
10
+ from langchain_core.messages import HumanMessage
11
+ from langchain_openai import ChatOpenAI
12
+ from tavily import TavilyClient
13
+ import wikipedia
14
+ from youtube_transcript_api import YouTubeTranscriptApi
15
+
16
+
17
+ openai_token = os.getenv("HF_FINAL_ASSIGNMENT_OPENAI")
18
+ tavily_api_key = os.getenv("HF_FINAL_ASSIGNMENT_TAVILY")
19
+
20
+ tavily_client = TavilyClient(api_key=tavily_api_key)
21
+ vision_llm = ChatOpenAI(model="gpt-5.2", api_key=openai_token, temperature=0)
22
+
23
+
24
+ def extract_text_from_image(img_path: str) -> str:
25
+ """
26
+ Extract text from an image file using a multimodal model.
27
+ Use this method only for image files.
28
+
29
+ Args:
30
+ img_path: A local image file path (strings).
31
+
32
+ Returns:
33
+ A single string containing the concatenated text extracted from each image.
34
+ """
35
+ all_text = ""
36
+ try:
37
+ # Read image and encode as base64
38
+ with open(img_path, "rb") as image_file:
39
+ image_bytes = image_file.read()
40
+
41
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
42
+
43
+ # Prepare the prompt including the base64 image data
44
+ message = [
45
+ HumanMessage(
46
+ content=[
47
+ {
48
+ "type": "text",
49
+ "text": (
50
+ "Extract all the text from this image. "
51
+ "Return only the extracted text, no explanations."
52
+ ),
53
+ },
54
+ {
55
+ "type": "image_url",
56
+ "image_url": {"url": f"data:image/png;base64,{image_base64}"},
57
+ },
58
+ ]
59
+ )
60
+ ]
61
+
62
+ # Call the vision-capable model
63
+ response = vision_llm.invoke(message)
64
+
65
+ # Append extracted text
66
+ all_text += response.content + "\n\n"
67
+
68
+ return all_text.strip()
69
+ except Exception as e:
70
+ # You can choose whether to raise or just return an empty string / error message
71
+ error_msg = f"Error extracting text: {str(e)}"
72
+ print(error_msg)
73
+ return ""
74
+
75
+
76
+ def tavily_search(query: str) -> dict:
77
+ """Search the web using Tavily and return a compact list of results as plain text."""
78
+ response = tavily_client.search(query=query, search_depth="advanced")
79
+ return response
80
+
81
+
82
+ def wikipedia_get_suggested_title_for_query(query: str) -> str:
83
+ """Get the most relevant Wikipedia page title for a given query."""
84
+ try:
85
+ suggested_title = wikipedia.suggest(query)
86
+ return suggested_title if suggested_title else ""
87
+ except Exception as e:
88
+ print(f"Error getting Wikipedia suggestion: {str(e)}")
89
+ return ""
90
+
91
+
92
+ def wikipedia_search_pages(query: str):
93
+ """
94
+ Search Wikipedia for a query and return a list of relevant page titles.
95
+ """
96
+ try:
97
+ search_results = wikipedia.search(query)
98
+ return "\n".join(search_results)
99
+ except Exception as e:
100
+ print(f"Error searching Wikipedia: {str(e)}")
101
+ return ""
102
+
103
+
104
+ def wikipedia_get_page_summary(page_title: str, lang: str = "en") -> str:
105
+ """
106
+ Get the summary of a Wikipedia page given its title.
107
+ """
108
+ try:
109
+ summary = wikipedia.summary(page_title)
110
+ return summary
111
+ except Exception as e:
112
+ print(f"Error getting Wikipedia page summary: {str(e)}")
113
+ return ""
114
+
115
+
116
+ def wikipedia_get_page_full_content(page_title: str):
117
+ """
118
+ Get the full content of a Wikipedia page given its title.
119
+
120
+ We can access most properties using property methods. Example:
121
+
122
+ ny = wikipedia.page("New York")
123
+
124
+ ny.title
125
+ u'New York'
126
+
127
+ ny.url
128
+ u'http://en.wikipedia.org/wiki/NewYork'
129
+
130
+ ny.content
131
+ u'New York is a state in the Northeastern region of the United States. New York is the 27th-most exten'...
132
+
133
+ ny.images[0]
134
+ u'http://upload.wikimedia.org/wikipedia/commons/9/91/New_York_quarter%2C_reverse_side%2C_2001.jpg'
135
+
136
+ ny.links[0]
137
+ u'1790 United States Census'
138
+
139
+ """
140
+ try:
141
+ page = wikipedia.page(page_title)
142
+ return page.content
143
+ except Exception as e:
144
+ print(f"Error getting Wikipedia page content: {str(e)}")
145
+ return ""
146
+
147
+
148
+ def youtube_get_transcript_of_video(video_url: str):
149
+ """
150
+ Get the transcript of a YouTube video given its URL.
151
+
152
+ using the YouTube Data API or a third-party library
153
+
154
+ This will return a FetchedTranscript object looking somewhat like this:
155
+
156
+ FetchedTranscript(
157
+ snippets=[
158
+ FetchedTranscriptSnippet(
159
+ text="Hey there",
160
+ start=0.0,
161
+ duration=1.54,
162
+ ),
163
+ FetchedTranscriptSnippet(
164
+ text="how are you",
165
+ start=1.54,
166
+ duration=4.16,
167
+ ),
168
+ # ...
169
+ ],
170
+ video_id="12345",
171
+ language="English",
172
+ language_code="en",
173
+ is_generated=False,
174
+ )
175
+
176
+ Do NOT run: `YouTubeTranscriptApi().fetch("https://www.youtube.com/watch?v=1234")`
177
+ Instead run: `YouTubeTranscriptApi().fetch("1234")`
178
+
179
+ """
180
+ # Placeholder implementation
181
+ ytt_api = YouTubeTranscriptApi()
182
+
183
+ # extract video ID from URL
184
+ video_id = video_url.split("v=")[-1]
185
+ fetched_transcript = ytt_api.fetch(video_id)
186
+
187
+ return fetched_transcript
188
+
189
+
190
+ def chessboard_image_to_text_description_to_fen_notation(
191
+ image_path: str, color_to_move: str
192
+ ) -> str:
193
+ """
194
+ Converts a chessboard image into a textual description of the position and its FEN notation.
195
+
196
+ Args:
197
+ image_path: A local image file path (string) representing the chessboard position.
198
+ color_to_move: A string indicating which color is to move ("white" or "black").
199
+
200
+ Returns:
201
+ A string indicating the FEN notation of the chess position.
202
+ """
203
+ all_text = ""
204
+ try:
205
+ # Read image and encode as base64
206
+ with open(image_path, "rb") as image_file:
207
+ image_bytes = image_file.read()
208
+
209
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
210
+
211
+ # Prepare the prompt including the base64 image data
212
+ message = [
213
+ HumanMessage(
214
+ content=[
215
+ {
216
+ "type": "text",
217
+ "text": (
218
+ "Draw a 8x8 table representing the chessboard."
219
+ "Describe the chess position rank by rank from rank 8 to rank 1. "
220
+ "For each rank, list what occupies each square from file a to file h. "
221
+ "One square at a time, complete the table with the piece occupying that square if any, or with '1' if the square is empty. "
222
+ "Once the table is complete, provide a textual description of the chessboard : uppercase letters for white pieces, lowercase letters for black pieces, and '1' for empty squares. "
223
+ "the values '1' in the table are helpful to determine the number of consecutive empty squares in a row, which is necessary to determine the FEN notation. "
224
+ "Based on this description, determine the FEN notation of the position."
225
+ "Reminder: for the FEN notation, start counting from rank 8 to rank 1, and for each rank, count from file a to file h."
226
+ "And if it is white to move, the FEN notation should end with 'w', and if it is black to move, the FEN notation should end with 'b'."
227
+ "Finally, the FEN notation should finish with the string '- - 0 1'"
228
+ ),
229
+ },
230
+ {
231
+ "type": "text",
232
+ "text": (f"It is {color_to_move} to move in this position."),
233
+ },
234
+ {
235
+ "type": "image_url",
236
+ "image_url": {"url": f"data:image/png;base64,{image_base64}"},
237
+ },
238
+ ]
239
+ )
240
+ ]
241
+
242
+ # Call the vision-capable model
243
+ response = vision_llm.invoke(message)
244
+
245
+ # Append extracted text
246
+ all_text += response.content + "\n\n"
247
+
248
+ print(f"Extracted table description: {all_text.strip()}")
249
+ return all_text.strip()
250
+ except Exception as e:
251
+ # You can choose whether to raise or just return an empty string / error message
252
+ error_msg = f"Error extracting text: {str(e)}"
253
+ print(error_msg)
254
+ return ""
255
+
256
+
257
+ def chessboard_get_fen_notation(image_path: str, color_to_move: str) -> str:
258
+ """
259
+ Converts digital chessboard image into Forsyth-Edwards notation (FEN) notation
260
+ Args:
261
+ - image_path: A local image file path (string) representing the chessboard position.
262
+ - color_to_move: A string indicating which color is to move ("white" or "black").
263
+ Returns:
264
+ A string representing the chess position in FEN notation.
265
+ """
266
+
267
+ all_text = ""
268
+ try:
269
+ # Read image and encode as base64
270
+ with open(image_path, "rb") as image_file:
271
+ image_bytes = image_file.read()
272
+
273
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
274
+
275
+ # Prepare the prompt including the base64 image data
276
+ message = [
277
+ HumanMessage(
278
+ content=[
279
+ {
280
+ "type": "text",
281
+ "text": (
282
+ "Describe the chess position rank by rank from rank 8 to rank 1. "
283
+ "For each rank, list what occupies each square from a to h. "
284
+ "Then convert your description to FEN notation."
285
+ "Reminder: for the FEN notation, start counting from rank 8 to rank 1, and for each rank, count from file a to file h."
286
+ "And if it is white to move, the FEN notation should end with 'w', and if it is black to move, the FEN notation should end with 'b'."
287
+ "Finally, the FEN notation should finish with the string '- - 0 1'"
288
+ ),
289
+ },
290
+ {
291
+ "type": "text",
292
+ "text": (f"It is {color_to_move} to move in this position."),
293
+ },
294
+ {
295
+ "type": "image_url",
296
+ "image_url": {"url": f"data:image/png;base64,{image_base64}"},
297
+ },
298
+ ]
299
+ )
300
+ ]
301
+
302
+ # Call the vision-capable model
303
+ response = vision_llm.invoke(message)
304
+
305
+ # Append extracted text
306
+ all_text += response.content + "\n\n"
307
+
308
+ print(f"Extracted FEN notation: {all_text.strip()}")
309
+ return all_text.strip()
310
+ except Exception as e:
311
+ # You can choose whether to raise or just return an empty string / error message
312
+ error_msg = f"Error extracting text: {str(e)}"
313
+ print(error_msg)
314
+ return ""
315
+
316
+
317
+ def get_best_next_move_from_fen(fen: str):
318
+ """
319
+ requests Lichess API to get the best next move given a chess position in FEN notation.
320
+ required parameters:
321
+ - fen: A string representing the chess position in Forsyth-Edwards Notation (FEN).
322
+ """
323
+
324
+ lichess_api_url = f"https://lichess.org/api/cloud-eval?fen={fen}"
325
+
326
+ try:
327
+ response = requests.get(lichess_api_url)
328
+ if response.status_code == 200:
329
+ data = response.json()
330
+ pvs = data.get(
331
+ "pvs", []
332
+ ) # list of principal variations (best move sequences)
333
+ if pvs and isinstance(pvs, list):
334
+ best_move = (
335
+ pvs[0].get("moves", "").split()[0]
336
+ ) # Get the first move of the best sequence
337
+ return best_move
338
+ else:
339
+ print(f"Error fetching best move from Lichess API: {response.status_code}")
340
+ return ""
341
+ except Exception as e:
342
+ print(f"Exception occurred while fetching best move from Lichess API: {str(e)}")
343
+ return ""
344
+
345
+
346
+ def execute_python_code_with_subprocess(code: str) -> str:
347
+ """
348
+ Executes Python code in a subprocess and returns the output as a string.
349
+ This can be used to execute code from the GAIA level 1 tasks in a safe environment.
350
+ Args:
351
+ - code: A string containing the Python code to execute.
352
+ Returns:
353
+ - A string containing the standard output from the executed code, or an error message if execution fails.
354
+ """
355
+ import subprocess
356
+ import sys
357
+
358
+ try:
359
+ # Run the code in a subprocess and capture the output
360
+ result = subprocess.run(
361
+ [sys.executable, "-c", code],
362
+ capture_output=True,
363
+ text=True,
364
+ timeout=60, # Set a timeout to prevent hanging
365
+ )
366
+ return result.stdout.strip()
367
+ except subprocess.TimeoutExpired:
368
+ return "Error: Code execution timed out."
369
+ except Exception as e:
370
+ return f"Error executing code: {str(e)}"
371
+
372
+
373
+ def transcribe_audio_file(audio_file_path: str) -> str:
374
+ """
375
+ Transcribes an audio file to text using OpenAI's gpt-4o-transcribe model.
376
+ Args:
377
+ - audio_file_path: A string representing the local path to the audio file.
378
+ Returns:
379
+ - A string containing the transcribed text from the audio file, or an error message if transcription fails.
380
+ """
381
+ from openai import OpenAI
382
+
383
+ client = OpenAI(api_key=openai_token)
384
+
385
+ try:
386
+ with open(audio_file_path, "rb") as audio_file:
387
+ transcript = client.audio.transcriptions.create(
388
+ model="gpt-4o-transcribe", file=audio_file, response_format="text"
389
+ )
390
+ return transcript.strip()
391
+ except Exception as e:
392
+ return f"Error transcribing audio: {str(e)}"
393
+
394
+
395
+ def read_excel_file(file_path: str) -> str:
396
+ """
397
+ Reads an Excel file and returns its content as a string.
398
+ Args:
399
+ - file_path: A string representing the local path to the Excel file.
400
+ Returns:
401
+ - A string containing the content of the Excel file, or an error message if reading fails.
402
+ """
403
+ try:
404
+ df = pd.read_excel(file_path)
405
+ print(f"Excel file read successfully. DataFrame shape: {df.head()}")
406
+ return df.to_string(index=False)
407
+ except Exception as e:
408
+ return f"Error reading Excel file: {str(e)}"
409
+
410
+
411
+ def divide(a: float, b: float) -> float:
412
+ """Divide a and b."""
413
+ return a / b
414
+
415
+
416
+ def multiply(a: float, b: float) -> float:
417
+ """Multiply a and b."""
418
+ return a * b
419
+
420
+
421
+ def add(a: float, b: float) -> float:
422
+ """Add a and b."""
423
+ return a + b
424
+
425
+
426
+ def subtract(a: float, b: float) -> float:
427
+ """Subtract b from a."""
428
+ return a - b
429
+
430
+
431
+ tools = [
432
+ extract_text_from_image,
433
+ divide,
434
+ multiply,
435
+ add,
436
+ subtract,
437
+ tavily_search,
438
+ wikipedia_get_suggested_title_for_query,
439
+ wikipedia_search_pages,
440
+ wikipedia_get_page_summary,
441
+ wikipedia_get_page_full_content,
442
+ youtube_get_transcript_of_video,
443
+ # chessboard_get_fen_notation,
444
+ get_best_next_move_from_fen,
445
+ chessboard_image_to_text_description_to_fen_notation,
446
+ execute_python_code_with_subprocess,
447
+ transcribe_audio_file,
448
+ read_excel_file,
449
+ ]
450
+
451
+
452
+ def select_tools_for_input(input_file: Optional[str]):
453
+ suffix = Path(input_file).suffix.lower() if input_file else ""
454
+
455
+ # Cas tableur
456
+ if suffix in [".xls", ".xlsx"]:
457
+ print("Selecting tools for Excel file input.")
458
+ return [
459
+ read_excel_file,
460
+ execute_python_code_with_subprocess,
461
+ add,
462
+ subtract,
463
+ multiply,
464
+ divide,
465
+ ]
466
+
467
+ # Cas image
468
+ if suffix in [".png", ".jpg", ".jpeg", ".webp", ".bmp", ".gif"]:
469
+ return [extract_text_from_image]
470
+
471
+ # Fallback général
472
+ return [
473
+ tavily_search,
474
+ wikipedia_get_suggested_title_for_query,
475
+ wikipedia_search_pages,
476
+ wikipedia_get_page_summary,
477
+ wikipedia_get_page_full_content,
478
+ execute_python_code_with_subprocess,
479
+ add,
480
+ subtract,
481
+ multiply,
482
+ divide,
483
+ ]