jesusgj commited on
Commit
892cc72
·
1 Parent(s): 80ef074

Modified files

Browse files
Files changed (1) hide show
  1. agent.py +251 -153
agent.py CHANGED
@@ -1,177 +1,275 @@
1
  import os
2
- import re
3
- import requests
4
- import serpapi
5
  import time
6
- from smolagents import CodeAgent, ToolCallingAgent, WebSearchTool, tool
7
- from smolagents import InferenceClientModel
 
 
 
 
 
 
 
 
8
  from dotenv import load_dotenv
9
- from markdownify import markdownify
10
  from requests.exceptions import RequestException
 
11
  from llama_index.core import VectorStoreIndex, download_loader
12
  from llama_index.core.schema import Document
13
- from youtube_transcript_api import YouTubeTranscriptApi
 
 
14
 
15
- search_cache = {}
16
- webpage_cache = {}
17
- MAX_RETRIES = 3
18
- INITIAL_DELAY = 1 # seconds
19
 
20
- def initialize_agent():
21
- # Load environment variables from .env file
 
 
 
 
 
 
 
 
22
  load_dotenv()
 
 
 
 
 
 
 
23
 
24
- # 1. Load the model
25
- # Make sure to set TOGETHER_API_KEY in your environment variables
26
- model_name = "mistralai/Mixtral-8x7B-Instruct-v0.1"
27
- try:
28
- model = InferenceClientModel(model_id=model_name, token=os.environ.get("TOGETHER_API_KEY"), provider="together")
29
- except Exception as e:
30
- print(f"Error loading model: {e}")
31
- model = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- # 2. Define the tools
34
  @tool
35
  def query_webpage(url: str, query: str) -> str:
36
- """Queries a webpage at the given URL to find specific information and returns a concise answer.
 
 
 
 
 
37
 
38
- Args:
39
- url: The URL of the webpage to query.
40
- query: The specific question to ask about the content of the webpage.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- Returns:
43
- A concise answer to the query based on the webpage's content, or an error message.
 
44
  """
45
- if (url, query) in webpage_cache:
46
- return webpage_cache[(url, query)]
47
-
48
- for i in range(MAX_RETRIES):
49
- try:
50
- BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
51
- loader = BeautifulSoupWebReader()
52
- documents = loader.load_data(urls=[url])
53
- index = VectorStoreIndex.from_documents(documents)
54
- query_engine = index.as_query_engine()
55
- response = query_engine.query(query)
56
- webpage_cache[(url, query)] = str(response)
57
- return str(response)
58
-
59
- except Exception as e:
60
- if i < MAX_RETRIES - 1:
61
- delay = INITIAL_DELAY * (2 ** i)
62
- print(f"Error querying webpage: {str(e)}. Retrying in {delay} seconds...")
63
- time.sleep(delay)
64
- else:
65
- return f"An unexpected error occurred after multiple retries: {str(e)}"
66
 
 
67
  @tool
68
- def query_youtube_video(video_id: str, query: str) -> str:
69
- """Queries a YouTube video's transcript to find specific information and returns a concise answer.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
- Args:
72
- video_id: The ID of the YouTube video.
73
- query: The specific question to ask about the content of the video transcript.
 
 
 
 
 
 
 
 
 
74
 
75
- Returns:
76
- A concise answer to the query based on the video transcript, or an error message.
77
- """
78
- if (video_id, query) in webpage_cache: # Using webpage_cache for simplicity
79
- return webpage_cache[(video_id, query)]
80
-
81
- for i in range(MAX_RETRIES):
82
- try:
83
- transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
84
- transcript_text = " ".join([t['text'] for t in transcript_list])
85
-
86
- documents = [Document(text=transcript_text)]
87
- index = VectorStoreIndex.from_documents(documents)
88
- query_engine = index.as_query_engine()
89
- response = query_engine.query(query)
90
- webpage_cache[(video_id, query)] = str(response)
91
- return str(response)
92
-
93
- except Exception as e:
94
- if i < MAX_RETRIES - 1:
95
- delay = INITIAL_DELAY * (2 ** i)
96
- print(f"Error querying YouTube video: {str(e)}. Retrying in {delay} seconds...")
97
- time.sleep(delay)
98
- else:
99
- return f"An unexpected error occurred after multiple retries: {str(e)}"
100
 
101
- @tool
102
- def google_search(query: str) -> str:
103
- """Searches Google for the given query and returns the results.
 
 
 
104
 
105
- Args:
106
- query: The query to search for.
107
 
108
- Returns:
109
- The search results, or an error message if the search fails.
110
- """
111
- if query in search_cache:
112
- return search_cache[query]
113
-
114
- for i in range(MAX_RETRIES):
115
- try:
116
- client = serpapi.Client(api_key=os.environ.get("SERPAPI_API_KEY"))
117
- results = client.search(q=query, engine="google")
118
- if "ai_overview" in results:
119
- ai_overview = results["ai_overview"]
120
- output = ""
121
- for block in ai_overview.get("text_blocks", []):
122
- if block["type"] == "paragraph":
123
- output += block["snippet"] + "\n\n"
124
- elif block["type"] == "heading":
125
- output += f"### {block['snippet']}\n\n"
126
- elif block["type"] == "list":
127
- for item in block["list"]:
128
- output += f"- **{item['title']}** {item['snippet']}\n"
129
- output += "\n"
130
- if "references" in ai_overview:
131
- output += "\n**References:**\n"
132
- for ref in ai_overview["references"]:
133
- output += f"- [{ref['title']}]({ref['link']})\n"
134
- search_cache[query] = output
135
- return output
136
- elif "organic_results" in results:
137
- result = str(results["organic_results"])
138
- search_cache[query] = result
139
- return result
140
- else:
141
- return "No results found."
142
- except Exception as e:
143
- if i < MAX_RETRIES - 1:
144
- delay = INITIAL_DELAY * (2 ** i)
145
- print(f"Error performing Google search: {str(e)}. Retrying in {delay} seconds...")
146
- time.sleep(delay)
147
- else:
148
- return f"Error performing Google search after multiple retries: {str(e)}"
149
-
150
- # 3. Define the agents
151
- if model:
152
- web_agent = ToolCallingAgent(
153
- tools=[WebSearchTool(), query_webpage, query_youtube_video, google_search],
154
- model=model,
155
- max_steps=10,
156
- name="web_search_agent",
157
- description="Runs web searches for you.",
158
- )
159
 
160
- manager_agent = CodeAgent(
161
- tools=[],
162
- model=model,
163
- managed_agents=[web_agent],
164
- additional_authorized_imports=["time", "numpy", "pandas", "requests", "serpapi", "llama_index", "beautifulsoup4", "markdownify", "lxml", "json", "urllib.parse", "youtube_transcript_api", "together"],
165
- instructions='''You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the a new line and the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
166
-
167
- To achieve the best results, follow these steps:
168
- 1. **Understand the Question:** Carefully read and analyze the user's question to identify the core task and any specific constraints (e.g., format, type of answer).
169
- 2. **Formulate a Plan:** Based on the question, devise a step-by-step plan. This might involve using web search, querying webpages, or analyzing YouTube videos. Consider what information is needed and which tool is best suited to obtain it.
170
- 3. **Execute Tools:** Prioritize using `WebSearchTool()` for general web searches. If `WebSearchTool()` fails, or if more specific, structured search results are required (e.g., for AI overviews or specific data points), then use `google_search` (SerpApi). Use `query_webpage` for detailed information extraction from specific URLs and `query_youtube_video` for YouTube transcript analysis. Be mindful of rate limits and use caching effectively.
171
- 4. **Synthesize Information:** Combine and process the information obtained from the tools to formulate a comprehensive answer. If the question requires specific data extraction, ensure accuracy.
172
- 5. **Format the Final Answer:** Adhere strictly to the specified FINAL ANSWER template. Ensure the answer type (number, string, comma-separated list) matches the question's requirement.
173
- 6. **Self-Correction:** If initial attempts fail or produce unsatisfactory results, re-evaluate the plan and try alternative approaches or tools.'''
174
- )
175
- return manager_agent
176
- else:
177
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
 
 
2
  import time
3
+ import logging
4
+ import urllib.parse as urlparse
5
+ import io
6
+ import contextlib
7
+ from functools import lru_cache, wraps
8
+
9
+ # Add necessary imports for new tools
10
+ import chess
11
+ from stockfish import Stockfish
12
+
13
  from dotenv import load_dotenv
 
14
  from requests.exceptions import RequestException
15
+ import serpapi
16
  from llama_index.core import VectorStoreIndex, download_loader
17
  from llama_index.core.schema import Document
18
+ from youtube_transcript_api import YouTubeTranscriptApi, YouTubeTranscriptApiError
19
+ from smolagents import CodeAgent, ToolCallingAgent, WebSearchTool, tool
20
+ from smolagents import InferenceClientModel
21
 
22
+ # --- Configuration and Setup ---
 
 
 
23
 
24
+ def configure_logging():
25
+ """Sets up basic logging configuration."""
26
+ logging.basicConfig(
27
+ level=logging.INFO,
28
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
29
+ datefmt="%Y-%m-%d %H:%M:%S"
30
+ )
31
+
32
+ def load_api_keys():
33
+ """Loads API keys from a .env file."""
34
  load_dotenv()
35
+ keys = {
36
+ 'together': os.getenv('TOGETHER_API_KEY'),
37
+ 'serpapi': os.getenv('SERPAPI_API_KEY'),
38
+ }
39
+ if not all(keys.values()):
40
+ raise ValueError("One or more API keys are missing. Please check your .env file.")
41
+ return keys
42
 
43
+ # --- Decorators ---
44
+
45
+ def retry(max_retries=3, initial_delay=1, backoff=2):
46
+ """A robust retry decorator with exponential backoff."""
47
+ def decorator(func):
48
+ @wraps(func)
49
+ def wrapper(*args, **kwargs):
50
+ delay = initial_delay
51
+ # Define specific, retry-able exceptions
52
+ retryable_exceptions = (RequestException, SerpApiClientException, YouTubeTranscriptApiError)
53
+ for attempt in range(1, max_retries + 1):
54
+ try:
55
+ return func(*args, **kwargs)
56
+ except retryable_exceptions as e:
57
+ if attempt == max_retries:
58
+ logging.error(f"{func.__name__} failed after {attempt} attempts: {e}")
59
+ raise
60
+ logging.warning(f"Attempt {attempt} for {func.__name__} failed: {e}. Retrying in {delay} seconds...")
61
+ time.sleep(delay)
62
+ delay *= backoff
63
+ except Exception as e:
64
+ logging.error(f"{func.__name__} failed with a non-retryable error: {e}")
65
+ raise
66
+ return wrapper
67
+ return decorator
68
+
69
+ # --- Main Agent Initialization (as called by app.py) ---
70
+
71
+ def initialize_agent():
72
+ """
73
+ Initializes a multi-disciplinary agent with a toolset and reasoning framework
74
+ designed for the benchmark's question categories.
75
+ """
76
+ api_keys = load_api_keys()
77
+
78
+ # --- Caching Layer for LlamaIndex ---
79
+ @lru_cache(maxsize=32)
80
+ @retry()
81
+ def get_webpage_index(url: str) -> VectorStoreIndex:
82
+ logging.info(f"Indexing webpage: {url}")
83
+ loader_cls = download_loader("BeautifulSoupWebReader")
84
+ loader = loader_cls()
85
+ docs = loader.load_data(urls=[url])
86
+ return VectorStoreIndex.from_documents(docs)
87
+
88
+ @lru_cache(maxsize=32)
89
+ @retry()
90
+ def get_youtube_index(video_id: str) -> VectorStoreIndex:
91
+ logging.info(f"Indexing YouTube video: {video_id}")
92
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
93
+ text = ' '.join([t['text'] for t in transcript])
94
+ doc = Document(text=text, doc_id=f"youtube_{video_id}")
95
+ return VectorStoreIndex.from_documents([doc])
96
+
97
+ # --- Specialized Tool Definitions ---
98
+
99
+ # 1. Web Search Tools
100
+ @tool
101
+ @retry()
102
+ def google_search(query: str) -> str:
103
+ """Use for general knowledge questions, finding facts, or when you don't have a specific URL."""
104
+ client = serpapi.Client(api_key=api_keys['serpapi'])
105
+ results = client.search(q=query, engine="google")
106
+ if organic_results := results.get('organic_results'):
107
+ md = ["### Top Search Results"]
108
+ for res in organic_results[:5]:
109
+ md.append(f"- **{res.get('title', 'N/A')}**: {res.get('snippet', 'No snippet available.')}\n [Source]({res.get('link', '#')})")
110
+ return "\n\n".join(md)
111
+ return "No results found."
112
 
 
113
  @tool
114
  def query_webpage(url: str, query: str) -> str:
115
+ """Use when you need to answer a specific question about the content of a single webpage URL."""
116
+ try:
117
+ index = get_webpage_index(url)
118
+ return str(index.as_query_engine().query(query))
119
+ except Exception as e:
120
+ return f"Error querying webpage {url}: {e}"
121
 
122
+ # 2. YouTube Tool
123
+ @tool
124
+ def query_youtube_video(video_url_or_id: str, query: str) -> str:
125
+ """Use for questions about the content of a YouTube video. Accepts a full URL or a video ID."""
126
+ try:
127
+ video_id = video_url_or_id
128
+ if "youtube.com" in video_url_or_id or "youtu.be" in video_url_or_id:
129
+ parsed_url = urlparse.urlparse(video_url_or_id)
130
+ video_id = urlparse.parse_qs(parsed_url.query).get('v', [None])[0]
131
+ if not video_id:
132
+ video_id = parsed_url.path.lstrip('/')
133
+ if not video_id:
134
+ return "Error: Could not extract a valid YouTube video ID."
135
+ index = get_youtube_index(video_id)
136
+ return str(index.as_query_engine().query(query))
137
+ except YouTubeTranscriptApiError as e:
138
+ return f"Error fetching transcript for video {video_id}: {e}"
139
+ except Exception as e:
140
+ return f"Error querying YouTube video {video_id}: {e}"
141
 
142
+ # 3. Coding Tool
143
+ @tool
144
+ def run_python_code(code: str) -> str:
145
  """
146
+ Executes a string of Python code and returns its standard output.
147
+ Use this for coding challenges, calculations, or data manipulation.
148
+ The code is executed in a restricted environment; it cannot access external files.
149
+ """
150
+ output = io.StringIO()
151
+ try:
152
+ with contextlib.redirect_stdout(output):
153
+ exec(code, {})
154
+ return output.getvalue()
155
+ except Exception as e:
156
+ return f"Error executing code: {e}"
 
 
 
 
 
 
 
 
 
 
157
 
158
+ # 4. Chess Tool
159
  @tool
160
+ def get_chess_move(fen: str) -> str:
161
+ """
162
+ Finds the best chess move for a given board position in FEN format.
163
+ Use this exclusively for chess-related questions.
164
+ """
165
+ # Path to stockfish can be set via env var for flexibility in HF Spaces
166
+ stockfish_path = os.getenv("STOCKFISH_PATH", "/usr/games/stockfish")
167
+ if not os.path.exists(stockfish_path):
168
+ return f"Error: Stockfish engine not found at {stockfish_path}. Please set STOCKFISH_PATH environment variable."
169
+ try:
170
+ stockfish = Stockfish(path=stockfish_path)
171
+ stockfish.set_fen_position(fen)
172
+ best_move = stockfish.get_best_move()
173
+ return best_move
174
+ except Exception as e:
175
+ return f"Error analyzing chess position: {e}"
176
 
177
+ # --- Model and Agent Initialization ---
178
+
179
+ try:
180
+ model = InferenceClientModel(
181
+ model_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
182
+ token=api_keys['together'],
183
+ provider="together"
184
+ )
185
+ logging.info("Model loaded successfully.")
186
+ except Exception as e:
187
+ logging.error(f"Failed to load model: {e}")
188
+ raise
189
 
190
+ # A single, powerful worker agent with a diverse toolset
191
+ worker_agent = ToolCallingAgent(
192
+ tools=[
193
+ google_search,
194
+ query_webpage,
195
+ query_youtube_video,
196
+ run_python_code,
197
+ get_chess_move,
198
+ ],
199
+ model=model,
200
+ max_steps=5, # Sub-tasks should be short and focused
201
+ name="multi_tool_worker",
202
+ description="A specialized worker agent that can search the web, query videos, execute code, and play chess."
203
+ )
 
 
 
 
 
 
 
 
 
 
 
204
 
205
+ # The manager agent acts as a strategic dispatcher.
206
+ manager = CodeAgent(
207
+ model=model,
208
+ managed_agents=[worker_agent],
209
+ instructions="""
210
+ You are a master AI assistant responsible for answering a user's question. Your goal is to provide a single, precise, and final answer.
211
 
212
+ **Your Strategic Thought Process for GAIA Tasks:**
 
213
 
214
+ 1. **ANALYZE THE QUESTION (Deep Understanding):**
215
+ * Carefully read and dissect the user's question. Identify all constraints, keywords, and the exact format required for the final answer (e.g., number, string, comma-separated list, specific units).
216
+ * Determine the core task: Is it a factual lookup, data extraction, code execution, video analysis, or a chess problem?
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
+ 2. **FORMULATE A DETAILED PLAN (Multi-step if needed):**
219
+ * Based on your analysis, outline a step-by-step strategy. For complex questions (Level 2/3 GAIA), this plan might involve multiple tool calls and intermediate reasoning steps.
220
+ * **Prioritize Tools:**
221
+ * For general web searches or initial broad information gathering, prefer `WebSearchTool()`. It's often quicker for a first pass.
222
+ * If `WebSearchTool()` doesn't yield precise results, or if you need structured data (e.g., AI overviews, specific facts from search results), use `google_search` (SerpApi).
223
+ * For extracting specific information from a known webpage URL, use `query_webpage`.
224
+ * For questions about YouTube video content, use `query_youtube_video`.
225
+ * For computational tasks or code generation, use `run_python_code`.
226
+ * For chess problems, use `get_chess_move`.
227
+ * Consider potential pitfalls and how to recover (e.g., if a search yields no results, try a different query).
228
+
229
+ 3. **EXECUTE AND ITERATE (Tool Delegation & Synthesis):**
230
+ * Delegate tasks to the `multi_tool_worker` agent, providing the exact tool and parameters.
231
+ * Carefully evaluate the output from each tool call.
232
+ * If the output is not sufficient, refine your query or try a different tool/approach. This is where iterative refinement and self-correction are crucial.
233
+ * Synthesize information from multiple sources if necessary to build the complete answer.
234
+
235
+ 4. **FORMULATE THE FINAL ANSWER (Precision & Format):**
236
+ * Once you have definitively found the answer, format it *exactly* as requested in the original question.
237
+ * **DO NOT** add any extra text, explanations, or conversational filler. The final answer must be *only* the answer itself.
238
+ * Example: If the question asks for a number and the answer is "123", your output should be `FINAL ANSWER: 123`. If it asks for a string "New York", your output should be `FINAL ANSWER: New York`. If it asks for a comma-separated list "apple,banana", your output should be `FINAL ANSWER: apple,banana`.
239
+ """
240
+ )
241
+ logging.info("Multi-task agent initialized successfully.")
242
+ return manager
243
+
244
+ # --- Main Execution Block for Local Testing ---
245
+
246
+ def main():
247
+ """Main function for local testing of the agent."""
248
+ configure_logging()
249
+ try:
250
+ global SerpApiClientException
251
+ from serpapi.client import SerpApiClientException
252
+
253
+ agent = initialize_agent()
254
+ if agent:
255
+ # Example prompts for each category
256
+ prompts = {
257
+ "Web Search": "Who is the current CEO of OpenAI?",
258
+ "YouTube": "What is the main topic of the video https://www.youtube.com/watch?v=bZQun8Y4L2A regarding AI models?",
259
+ "Coding": "Write a Python script that calculates and prints the factorial of 5.",
260
+ "Chess": "What is the best move for the starting chess position? The FEN is 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1'."
261
+ }
262
+
263
+ for category, prompt in prompts.items():
264
+ logging.info(f"\n--- Testing Category: {category} ---")
265
+ logging.info(f"Prompt: {prompt}")
266
+ response = agent.run(prompt)
267
+ logging.info(f"Agent's Final Answer: {response}")
268
+ logging.info("-" * (30 + len(category)))
269
+
270
+ except Exception as e:
271
+ logging.critical(f"An unhandled error occurred during local testing: {e}", exc_info=True)
272
+
273
+ if __name__ == "__main__":
274
+ # This allows you to test the agent's logic by running `python agent.py` locally.
275
+ main()