Markiian Tsalyk commited on
Commit
12c47a4
·
1 Parent(s): 81917a3

LlamaIndex agent

Browse files
__pycache__/agent.cpython-313.pyc ADDED
Binary file (1.75 kB). View file
 
__pycache__/f918266a-b3e0-4914-865d-4faa564f1aef.cpython-313.pyc ADDED
Binary file (367 Bytes). View file
 
__pycache__/open_router_chat.cpython-313.pyc ADDED
Binary file (1.62 kB). View file
 
__pycache__/tools.cpython-313.pyc ADDED
Binary file (10.2 kB). View file
 
__pycache__/wikipedia_tables_parser.cpython-313.pyc ADDED
Binary file (4.68 kB). View file
 
app.py CHANGED
@@ -3,6 +3,8 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
@@ -10,25 +12,18 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
- username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
@@ -40,7 +35,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
@@ -55,16 +50,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
  except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -76,23 +71,44 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
 
 
83
  submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
 
@@ -162,20 +178,19 @@ with gr.Blocks() as demo:
162
 
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
 
166
  # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
- run_button.click(
170
- fn=run_and_submit_all,
171
- outputs=[status_output, results_table]
172
- )
173
 
174
  if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
  # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
 
180
  if space_host_startup:
181
  print(f"✅ SPACE_HOST found: {space_host_startup}")
@@ -183,14 +198,18 @@ if __name__ == "__main__":
183
  else:
184
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
 
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
  print(f"✅ SPACE_ID found: {space_id_startup}")
188
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
 
 
190
  else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
192
 
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
 
195
  print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from llama_index_agent import LlamaIndexAgent
7
+
8
 
9
  # (Keep Constants as is)
10
  # --- Constants ---
 
12
 
13
  # --- Basic Agent Definition ---
14
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
15
+
16
+
17
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
18
  """
19
  Fetches all questions, runs the BasicAgent on them, submits all answers,
20
  and displays the results.
21
  """
22
  # --- Determine HF Space Runtime URL and Repo URL ---
23
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
24
 
25
  if profile:
26
+ username = f"{profile.username}"
27
  print(f"User logged in: {username}")
28
  else:
29
  print("User not logged in.")
 
35
 
36
  # 1. Instantiate Agent ( modify this part to create your agent)
37
  try:
38
+ agent = LlamaIndexAgent()
39
  except Exception as e:
40
  print(f"Error instantiating agent: {e}")
41
  return f"Error initializing agent: {e}", None
 
50
  response.raise_for_status()
51
  questions_data = response.json()
52
  if not questions_data:
53
+ print("Fetched questions list is empty.")
54
+ return "Fetched questions list is empty or invalid format.", None
55
  print(f"Fetched {len(questions_data)} questions.")
56
  except requests.exceptions.RequestException as e:
57
  print(f"Error fetching questions: {e}")
58
  return f"Error fetching questions: {e}", None
59
  except requests.exceptions.JSONDecodeError as e:
60
+ print(f"Error decoding JSON response from questions endpoint: {e}")
61
+ print(f"Response text: {response.text[:500]}")
62
+ return f"Error decoding server response for questions: {e}", None
63
  except Exception as e:
64
  print(f"An unexpected error occurred fetching questions: {e}")
65
  return f"An unexpected error occurred fetching questions: {e}", None
 
71
  for item in questions_data:
72
  task_id = item.get("task_id")
73
  question_text = item.get("question")
74
+ file_name = item.get("file_name")
75
  if not task_id or question_text is None:
76
  print(f"Skipping item with missing task_id or question: {item}")
77
  continue
78
  try:
79
+ if len(file_name) > 0:
80
+ question_text = f"{question_text}\nAttached file: {file_name}"
81
  submitted_answer = agent(question_text)
82
+ answers_payload.append(
83
+ {"task_id": task_id, "submitted_answer": submitted_answer}
84
+ )
85
+ results_log.append(
86
+ {
87
+ "Task ID": task_id,
88
+ "Question": question_text,
89
+ "Submitted Answer": submitted_answer,
90
+ }
91
+ )
92
  except Exception as e:
93
+ print(f"Error running agent on task {task_id}: {e}")
94
+ results_log.append(
95
+ {
96
+ "Task ID": task_id,
97
+ "Question": question_text,
98
+ "Submitted Answer": f"AGENT ERROR: {e}",
99
+ }
100
+ )
101
 
102
  if not answers_payload:
103
  print("Agent did not produce any answers to submit.")
104
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
105
 
106
+ # 4. Prepare Submission
107
+ submission_data = {
108
+ "username": username.strip(),
109
+ "agent_code": agent_code,
110
+ "answers": answers_payload,
111
+ }
112
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
113
  print(status_update)
114
 
 
178
 
179
  run_button = gr.Button("Run Evaluation & Submit All Answers")
180
 
181
+ status_output = gr.Textbox(
182
+ label="Run Status / Submission Result", lines=5, interactive=False
183
+ )
184
  # Removed max_rows=10 from DataFrame constructor
185
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
186
 
187
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
 
 
188
 
189
  if __name__ == "__main__":
190
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
191
  # Check for SPACE_HOST and SPACE_ID at startup for information
192
  space_host_startup = os.getenv("SPACE_HOST")
193
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
194
 
195
  if space_host_startup:
196
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
198
  else:
199
  print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
200
 
201
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
202
  print(f"✅ SPACE_ID found: {space_id_startup}")
203
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
204
+ print(
205
+ f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
206
+ )
207
  else:
208
+ print(
209
+ "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
210
+ )
211
 
212
+ print("-" * (60 + len(" App Starting ")) + "\n")
213
 
214
  print("Launching Gradio Interface for Basic Agent Evaluation...")
215
+ demo.launch(debug=True, share=False)
f918266a-b3e0-4914-865d-4faa564f1aef.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ def simple_func():
2
+ return 124.5
3
+
4
+
5
+ print(simple_func())
llama_index_agent.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+
6
+
7
+ from llama_index.core.agent import ReActAgent
8
+ from llama_index.core.tools import FunctionTool
9
+ from llama_index.llms.openrouter import OpenRouter
10
+
11
+ import tools
12
+
13
+
14
+ class LlamaIndexAgent:
15
+ def __init__(
16
+ self,
17
+ # model_name: str = "meta-llama/llama-4-maverick:free",
18
+ # model_name: str = "meta-llama/llama-4-scout:free",
19
+ # model_name: str = "microsoft/phi-4-reasoning-plus:free",
20
+ model_name: str = "google/gemini-2.5-flash-preview",
21
+ temperature: float = 0.7,
22
+ verbose: bool = True,
23
+ ):
24
+ """
25
+ Initialize the LlamaIndex agent with OpenRouter LLM.
26
+
27
+ Args:
28
+ openrouter_api_key: API key for OpenRouter
29
+ model_name: Model name to use from OpenRouter
30
+ temperature: Temperature setting for the LLM
31
+ verbose: Whether to output verbose logs
32
+ """
33
+ self.llm = OpenRouter(
34
+ api_key=os.getenv("OPENROUTER_API_KEY"),
35
+ model=model_name,
36
+ temperature=temperature,
37
+ )
38
+
39
+ # Define tools
40
+ reverse_tool = FunctionTool.from_defaults(
41
+ fn=tools.reverse_text,
42
+ name="reverse_text",
43
+ description="Reverses the given text",
44
+ )
45
+
46
+ final_answer_tool = FunctionTool.from_defaults(
47
+ fn=tools.final_answer,
48
+ name="final_answer",
49
+ description="Use this to provide your final answer to the user's question",
50
+ )
51
+ web_search_tool = FunctionTool.from_defaults(
52
+ fn=tools.web_search,
53
+ name="web_search",
54
+ description="Use this to search the web for the given query",
55
+ )
56
+ wikipedia_search_tool = FunctionTool.from_defaults(
57
+ fn=tools.wikipedia_search,
58
+ name="wikipedia_search",
59
+ description="Use this to search the wikipedia for the given query",
60
+ )
61
+ multiply_tool = FunctionTool.from_defaults(
62
+ fn=tools.multiply,
63
+ name="multiply",
64
+ description="Use this to multiply two numbers",
65
+ )
66
+ length_tool = FunctionTool.from_defaults(
67
+ fn=tools.length,
68
+ name="length",
69
+ description="Use this to get the length of an iterable",
70
+ )
71
+ execute_python_file_tool = FunctionTool.from_defaults(
72
+ fn=tools.execute_python_file,
73
+ name="execute_python_file",
74
+ description="Use this to execute a python file",
75
+ )
76
+ transcript_youtube_tool = FunctionTool.from_defaults(
77
+ fn=tools.trascript_youtube,
78
+ name="transcript_youtube",
79
+ description="Use this to get the transcript of a YouTube video",
80
+ )
81
+ classify_fruit_vegitable_tool = FunctionTool.from_defaults(
82
+ fn=tools.classify_fruit_vegitable,
83
+ name="classify_fruit_vegitable",
84
+ description="Use this to classify items to fruits and vegitables",
85
+ )
86
+ fetch_historical_event_data_tool = FunctionTool.from_defaults(
87
+ fn=tools.fetch_historical_event_data,
88
+ name="fetch_historical_event_data",
89
+ description="Use this to fetch data about historical event that occured in certain year such as Olympics games, Footbal games, NBA etc.",
90
+ )
91
+
92
+ # Create the agent
93
+ self.agent = ReActAgent.from_tools(
94
+ [
95
+ reverse_tool,
96
+ final_answer_tool,
97
+ web_search_tool,
98
+ wikipedia_search_tool,
99
+ multiply_tool,
100
+ length_tool,
101
+ execute_python_file_tool,
102
+ transcript_youtube_tool,
103
+ classify_fruit_vegitable_tool,
104
+ fetch_historical_event_data_tool,
105
+ ],
106
+ llm=self.llm,
107
+ verbose=verbose,
108
+ max_iterations=20,
109
+ system_prompt="""
110
+ You are a helpful AI assistant that can use tools to answer the user's questions.
111
+ You have set of tools that you are free to use.
112
+ When you have the complete answer to the user's question, always use the final_answer tool to present it.
113
+ """,
114
+ )
115
+
116
+ self.small_agent = ReActAgent.from_tools(
117
+ [final_answer_tool],
118
+ llm=self.llm,
119
+ verbose=verbose,
120
+ max_iterations=10,
121
+ system_prompt="You are approached to prepare answer for the user question in desired format. You always need to use final_answer tool, it will help you.",
122
+ )
123
+
124
+ def __call__(self, query_text: str, **kwds) -> str:
125
+ """
126
+ Process a user query through the agent.
127
+
128
+ Args:
129
+ query_text: User's query text
130
+
131
+ Returns:
132
+ The agent's response
133
+ """
134
+ try:
135
+ response = self.agent.chat(query_text).response
136
+ except:
137
+ response = ""
138
+ final_response = self.small_agent.chat(
139
+ f"Question: {query_text}\nResponse: {response}"
140
+ )
141
+
142
+ return final_response.response
143
+
144
+
145
+ if __name__ == "__main__":
146
+ agent = LlamaIndexAgent()
147
+
148
+ # Queries
149
+ example_queries = [
150
+ # '.rewsna eht sa "tfel" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI',
151
+ # "What is the weather in Lviv now?",
152
+ # "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.",
153
+ # "Given this table defining * on the set S = {a, b, c, d, e}\n\n|*|a|b|c|d|e|\n|---|---|---|---|---|---|\n|a|a|b|c|b|d|\n|b|b|c|a|e|c|\n|c|c|a|b|b|a|\n|d|b|e|b|e|d|\n|e|d|b|a|d|c|\n\nprovide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.",
154
+ # "What is the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists?"
155
+ # "Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.",
156
+ "What country had the least number of athletes at the 1928 Summer Olympics? If there's a tie for a number of athletes, return the first in alphabetical order. Give the IOC country code as your answer."
157
+ # "Where were the Vietnamese specimens described by Kuznetzov in Nedoshivina's 2010 paper eventually deposited? Just give me the city name without abbreviations.",
158
+ # "How many at bats did the Yankee with the most walks in the 1977 regular season have that same season?",
159
+ # "Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.",
160
+ # "Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?",
161
+ # "What is the final numeric output from the attached Python code? File name: f918266a-b3e0-4914-865d-4faa564f1aef.py",
162
+ # """Examine the video at https://www.youtube.com/watch?v=1htKBjuUWec.\n\nWhat does Teal'c say in response to the question \"Isn't that hot?\"""",
163
+ # "What is the surname of the equine veterinarian mentioned in 1.E Exercises from the chemistry materials licensed by Marisa Alviar-Agnew & Henry Agnew under the CK-12 license in LibreText's Introductory Chemistry materials as compiled 08/21/2023?",
164
+ # """
165
+ # I'm making a grocery list for my mom, but she's a professor of botany and she's a real stickler when it comes to categorizing things. I need to add different foods to different categories on the grocery list, but if I make a mistake, she won't buy anything inserted in the wrong category. Here's the list I have so far:\n\nmilk, eggs, flour, whole bean coffee, Oreos, sweet potatoes, fresh basil, plums, green beans, rice, corn, bell pepper, whole allspice, acorns, broccoli, celery, zucchini, lettuce, peanuts\n\nI need to make headings for the fruits and vegetables. Could you please create a list of just the vegetables from my list? If you could do that, then I can figure out how to categorize the rest of the list into the appropriate categories. But remember that my mom is a real stickler, so make sure that no botanical fruits end up on the vegetable list, or she won't get them when she's at the store. Please alphabetize the list of vegetables, and place each item in a comma separated list.
166
+ # """,
167
+ # """
168
+ # On June 6, 2023, an article by Carolyn Collins Petersen was published in Universe Today. This article mentions a team that produced a paper about their observations, linked at the bottom of the article. Find this paper. Under what NASA award number was the work performed by R. G. Arendt supported by?
169
+ # """,
170
+ ]
171
+
172
+ for query in example_queries:
173
+ print(f"\nQuery: {query}")
174
+ response = agent(query)
175
+ print(f"Response: {response}")
requirements.txt CHANGED
@@ -1,2 +1,14 @@
1
  gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  gradio
2
+ gradio[oauth]
3
+ requests
4
+ pandas
5
+ smolagents
6
+ openai
7
+ langchain
8
+ langchain-openai
9
+ langchain-community
10
+ llama-index
11
+ llama-index-llms-openrouter
12
+ wikipedia
13
+ youtube-transcript-api
14
+ python-dotenv
tools.py ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import DuckDuckGoSearchTool
2
+ from youtube_transcript_api import YouTubeTranscriptApi
3
+ import wikipedia
4
+ from wikipedia_tables_parser import fetch_wikipedia_tables
5
+ import pandas as pd
6
+ from typing import Any
7
+ import os
8
+ from dotenv import load_dotenv
9
+
10
+ load_dotenv()
11
+ import importlib.util
12
+ import sys
13
+ import io
14
+ import contextlib
15
+ from llama_index.llms.openrouter import OpenRouter
16
+ from llama_index.core.types import ChatMessage
17
+
18
+
19
+ llm = OpenRouter(
20
+ api_key=os.getenv("OPENROUTER_API_KEY"),
21
+ model="google/gemini-2.5-flash-preview",
22
+ temperature=0.7,
23
+ )
24
+
25
+
26
+ def reverse_text(text: str, **kwargs) -> str:
27
+ """
28
+ Returns the reversed version of the text.
29
+ If you receive some unknown text, that can't be recognized and analyzed, then you need to use this tool to make it clear.
30
+
31
+ Args:
32
+ text: text to be reversed
33
+
34
+ Return:
35
+ The reversed text.
36
+ """
37
+ try:
38
+ print(text[::-1])
39
+ return text[::-1]
40
+ except Exception as e:
41
+ raise ValueError(f"Can't reverse text: {e}")
42
+
43
+
44
+ def fetch_historical_event_data(event_name: str, year: str, **kwargs) -> str:
45
+ """
46
+ Fetches data about historical event that occured in certain year.
47
+ Some examples of events: Olympics games, Footbal games, NBA etc.
48
+
49
+ Args:
50
+ event_name: String name of the event
51
+ year: String year of the event
52
+
53
+ Return:
54
+ String with data about the event
55
+ """
56
+ result = wikipedia.page(f"{event_name} in {year}")
57
+
58
+ url = result.url
59
+ content = result.content
60
+ try:
61
+ tables = pd.read_html(url)
62
+ except Exception as e:
63
+ tables = fetch_wikipedia_tables(url)
64
+
65
+ result = f"Content: {content}\nTables: {tables}"
66
+
67
+ return result
68
+
69
+
70
+ def classify_fruit_vegitable(item: str, **kwargs) -> str:
71
+ """
72
+ Classifies items to fruits and vegitables
73
+
74
+ Args:
75
+ item: Item to classify
76
+
77
+ Returns:
78
+ Text with explanation whether it is a fruit or vegetable.
79
+ """
80
+ response = llm.chat(
81
+ messages=[
82
+ ChatMessage(
83
+ content=f"Classify whether it is fruit or vegetable: {item}. Return only `fruit` or `vegetable` without explanations"
84
+ )
85
+ ]
86
+ )
87
+ return response.message.content
88
+
89
+
90
+ def web_search(query: str, **kwargs) -> str:
91
+ """
92
+ Returns web search results for the provided query.
93
+ Don't use it for Wikipedia queries. For Wikipedia queries use wikipedia_search tool.
94
+ Important, query is human-language string input, not the URL or key.
95
+
96
+ Args:
97
+ query: query to search in WEB
98
+
99
+ Return:
100
+ String with web search results.
101
+ """
102
+ result = DuckDuckGoSearchTool().forward(query)
103
+ print(result)
104
+ return result
105
+
106
+
107
+ def wikipedia_search(query: str, **kwargs) -> Any:
108
+ """
109
+ Returns wikipedia search results for the provided query.
110
+
111
+ Args:
112
+ query: query to search in WIKIPEDIA
113
+
114
+ Return:
115
+ Wikipedia search results.
116
+ """
117
+ result = wikipedia.page(query)
118
+
119
+ url = result.url
120
+ content = result.content
121
+ try:
122
+ tables = pd.read_html(url)
123
+ except:
124
+ tables = fetch_wikipedia_tables(url)
125
+
126
+ result = f"Content: {content}\nTables: {tables}"
127
+
128
+ return result
129
+
130
+
131
+ def multiply(a: float, b: float, **kwargs) -> float:
132
+ """
133
+ Multiply two numbers.
134
+
135
+ Args:
136
+ a: First number
137
+ b: Second number
138
+
139
+ Return:
140
+ The product of the two numbers.
141
+ """
142
+ return a * b
143
+
144
+
145
+ def length(iterable: Any, **kwargs) -> int:
146
+ """
147
+ Return the length of an iterable.
148
+
149
+ Args:
150
+ iterable: Any iterable
151
+
152
+ Return:
153
+ The length of the iterable.
154
+ """
155
+ return len(iterable)
156
+
157
+
158
+ def execute_python_file(file_path: str) -> Any:
159
+ """
160
+ Executes a Python file and returns its result.
161
+
162
+ This function takes a path to a Python file, executes it by importing it as a module,
163
+ and returns the result. The file should contain a function call that produces
164
+ the result to be returned.
165
+
166
+ Args:
167
+ file_path (str): Path to the Python file to execute.
168
+
169
+ Returns:
170
+ Any: The result of executing the Python file. If the file sets a variable
171
+ named 'result', that value will be returned.
172
+
173
+ Raises:
174
+ FileNotFoundError: If the specified file does not exist.
175
+ ImportError: If there was an error importing the Python file.
176
+
177
+ Example:
178
+ >>> # If example.py contains: result = 2 + 3
179
+ >>> execute_python_file('example.py')
180
+ 5
181
+ """
182
+ # Verify file exists
183
+ if not os.path.isfile(file_path):
184
+ raise FileNotFoundError(f"File not found: {file_path}")
185
+
186
+ # Get the directory and filename
187
+ file_dir = os.path.dirname(os.path.abspath(file_path))
188
+ file_name = os.path.basename(file_path)
189
+ module_name = file_name.replace(".py", "")
190
+
191
+ # Store original sys.path and add the file's directory
192
+ original_sys_path = sys.path.copy()
193
+ sys.path.insert(0, file_dir)
194
+
195
+ # Prepare stdout/stderr capture
196
+ stdout_capture = io.StringIO()
197
+ stderr_capture = io.StringIO()
198
+
199
+ # Store the original __main__ module
200
+ original_main = sys.modules.get("__main__")
201
+
202
+ try:
203
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
204
+ if spec is None or spec.loader is None:
205
+ raise ImportError(f"Could not load module spec from {file_path}")
206
+
207
+ module = importlib.util.module_from_spec(spec)
208
+
209
+ sys.modules[module_name] = module
210
+
211
+ # Execute the module
212
+ with contextlib.redirect_stdout(stdout_capture), contextlib.redirect_stderr(
213
+ stderr_capture
214
+ ):
215
+ spec.loader.exec_module(module)
216
+
217
+ if hasattr(module, "result"):
218
+ return module.result
219
+ else:
220
+ print(f"RESULT PYTHON: {stdout_capture.getvalue().strip()}")
221
+ return stdout_capture.getvalue().strip()
222
+
223
+ except Exception as e:
224
+ error_output = stderr_capture.getvalue()
225
+ if error_output:
226
+ raise type(e)(f"{str(e)}\nProgram output: {error_output}") from None
227
+ else:
228
+ raise
229
+ finally:
230
+ sys.path = original_sys_path
231
+
232
+ if module_name in sys.modules:
233
+ del sys.modules[module_name]
234
+
235
+
236
+ def trascript_youtube(video_id: str, **kwargs) -> list:
237
+ """
238
+ Returns transcript of YouTube video.
239
+
240
+ Args:
241
+ video_id: ID of youtube video (Pass in the video ID, NOT the video URL. For a video with the URL https://www.youtube.com/watch?v=12345 the ID is 12345.)
242
+
243
+ Return:
244
+ Transcript of YouTube video.
245
+ """
246
+ ytt_api = YouTubeTranscriptApi()
247
+ result = ytt_api.fetch(video_id)
248
+
249
+ return result.snippets
250
+
251
+
252
+ def final_answer(query: str, answer: str, **kwargs) -> str:
253
+ """
254
+ Prepare the final answer for the user. It should be always used as a last step.
255
+
256
+ Args:
257
+ query: The initial query of the user
258
+ answer: The answer to format and return to the user
259
+ Return:
260
+ The final answer.
261
+ """
262
+ return f"""
263
+ User query: {query}
264
+ Final answer from agent: {answer}
265
+ Adapt final answer to user request.
266
+ There might be requested exact number, then you need to compress the output so that it was only number without any comments or explanations (float or integer).
267
+ And on the other hand, the question might request some exact string value. Don't explain it, just return this value (For example, insted of `In response to the question, desired person is X` return only `X`)
268
+ """
269
+
270
+
271
+ # print(wikipedia_search("Mercedes Sosa studio albums"))
272
+ # execute_python_file("f918266a-b3e0-4914-865d-4faa564f1aef.py")
wikipedia_tables_parser.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+
6
+ def fetch_wikipedia_tables(
7
+ url: str,
8
+ handle_special_chars: bool = True,
9
+ ) -> list[pd.DataFrame]:
10
+ """
11
+ Fetch tables from a Wikipedia URL with robust error handling.
12
+
13
+ Parameters:
14
+ -----------
15
+ url : str
16
+ The Wikipedia URL to fetch tables from.
17
+ handle_special_chars : bool, default True
18
+ Whether to clean special characters in data before parsing.
19
+
20
+ Returns:
21
+ --------
22
+ list of pd.DataFrame
23
+ A list of pandas DataFrames containing the tables found on the page.
24
+ """
25
+ try:
26
+ all_tables = _fetch_tables_with_bs4(url)
27
+
28
+ if handle_special_chars:
29
+ # Clean tables to handle special characters and formatting issues
30
+ for i, table in enumerate(all_tables):
31
+ all_tables[i] = _clean_table(table)
32
+
33
+ if all_tables:
34
+ return all_tables
35
+ else:
36
+ print(f"No tables found at {url}")
37
+ return []
38
+ except Exception as e:
39
+ print(f"Error fetching tables: {e}")
40
+ return []
41
+
42
+
43
+ def _fetch_tables_with_bs4(url: str) -> list[pd.DataFrame]:
44
+ """Method to fetch tables using BeautifulSoup."""
45
+ try:
46
+ response = requests.get(url)
47
+ response.raise_for_status()
48
+ soup = BeautifulSoup(response.content, "html.parser")
49
+ tables = []
50
+
51
+ for table in soup.find_all("table", {"class": "wikitable"}):
52
+ data = []
53
+ headers = []
54
+
55
+ # Extract headers
56
+ for th in table.find_all("th"):
57
+ headers.append(th.text.strip())
58
+
59
+ # If no headers found in th tags, try first tr
60
+ if not headers and table.find("tr"):
61
+ for td in table.find("tr").find_all(["th", "td"]):
62
+ headers.append(td.text.strip())
63
+
64
+ # Extract rows
65
+ for row in table.find_all("tr")[1:] if headers else table.find_all("tr"):
66
+ row_data = []
67
+ for cell in row.find_all(["td", "th"]):
68
+ row_data.append(cell.text.strip())
69
+ if row_data: # Skip empty rows
70
+ data.append(row_data)
71
+
72
+ # Create DataFrame
73
+ if data:
74
+ if headers and len(headers) == len(data[0]):
75
+ df = pd.DataFrame(data, columns=headers)
76
+ else:
77
+ df = pd.DataFrame(data)
78
+ tables.append(df)
79
+
80
+ return tables
81
+ except Exception as e:
82
+ print(f"Error in BeautifulSoup fallback: {e}")
83
+ return []
84
+
85
+
86
+ def _clean_table(df: pd.DataFrame) -> pd.DataFrame:
87
+ """Clean a table by handling special characters and formatting issues."""
88
+ # Make a copy to avoid modifying the original
89
+ df = df.copy()
90
+
91
+ # Handle all string columns
92
+ for col in df.columns:
93
+ if df[col].dtype == "object":
94
+ # Replace common problematic characters
95
+ df[col] = df[col].astype(str).str.replace(";", "", regex=False)
96
+ df[col] = df[col].str.replace("−", "-", regex=False) # Replace minus sign
97
+ df[col] = df[col].str.replace(
98
+ "\xa0", " ", regex=False
99
+ ) # Replace non-breaking space
100
+ df[col] = df[col].str.replace("\n", " ", regex=False) # Replace newlines
101
+ df[col] = df[col].str.strip() # Strip whitespace
102
+
103
+ # Remove reference tags like [1], [2], etc.
104
+ df[col] = df[col].str.replace(r"\[\d+\]", "", regex=True)
105
+
106
+ return df