File size: 10,055 Bytes
0b88bc1
 
 
a14137a
 
 
0b88bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a14137a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
from langchain_core.tools import tool
from langchain_community.document_loaders import WebBaseLoader, WikipediaLoader, ArxivLoader
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_community.document_loaders import YoutubeLoader
from langchain_community.tools import TavilySearchResults

import json
import sys
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

@tool
def add(values: list[int]) -> float:
    """
    Add all numbers in a list together
    Args:
        values: A list of numbers to sum
    Returns:
        The sum of all numbers in the list
    """
    logger.info(f"Adding numbers: {values}")
    return sum(values)

@tool
def subtract(a: int, b: int) -> int:
    """
    Subtract two numbers
    Args:
        a: The first number
        b: The second number
    Returns:
        The difference between the two numbers
    """
    logger.info(f"Subtracting {a} - {b}")
    return a - b

@tool
def multiply(a: int, b: int) -> int:
    """
    Multiply two numbers
    Args:
        a: The first number
        b: The second number
    Returns:
        The product of the two numbers
    """
    logger.info(f"Multiplying {a} * {b}")
    return a * b

@tool
def divide(a: int, b: int) -> float:
    """
    Divide two numbers
    Args:
        a: The first number
        b: The second number
    Returns:
        The quotient of the two numbers
    """
    logger.info(f"Dividing {a} / {b}")
    return a / b    

@tool
def modulo(a: int, b: int) -> int:
    """
    Calculate the modulo of two numbers
    Args:
        a: The first number
        b: The second number
    Returns:
        The remainder of the division of the two numbers
    """
    logger.info(f"Calculating modulo of {a} % {b}")
    return a % b

@tool
def wikipedia_search(query: str) -> str:
    """
    Search Wikipedia for information
    Args:
        query: The query to search for
    Returns:
        The search results
    """
    logger.info(f"Searching Wikipedia for: {query}")
    docs_found = WikipediaLoader(query=query, load_max_docs=3).load()
    # format the docs found into a string keeping just first paragraph
    formatted_results = []
    
    for i, doc in enumerate(docs_found, 1):
        source = doc.metadata.get('source', 'Unknown source')
        title = doc.metadata.get('title', 'Untitled')
        
        # Get the first paragraph (split by \n\n and take first part)
        content = doc.page_content.strip()
        #first_paragraph = content.split('\n\n')[0] if content else "No content available"
        first_paragraph = content if content else "No content available"
        
        formatted_doc = f"""--- DOCUMENT {i} START ---
Source: {source}
Title: {title}
Content: {first_paragraph}
--- DOCUMENT {i} END ---"""
        
        formatted_results.append(formatted_doc)
    
    return "\n\n".join(formatted_results)

@tool
def arxiv_search(query: str) -> str:
    """
    Search ArXiv for research papers
    Args:
        query: The query to search for
    Returns:
        The search results with abstracts
    """
    logger.info(f"Searching ArXiv for: {query}")
    docs_found = ArxivLoader(query=query, load_max_docs=3).load()
    formatted_results = []
    
    for i, doc in enumerate(docs_found, 1):
        source = doc.metadata.get('source', 'Unknown source')
        title = doc.metadata.get('title', 'Untitled')
        
        # For ArXiv, the abstract is typically in the page_content or metadata
        abstract = doc.page_content.strip() if doc.page_content else "No abstract available"
        
        formatted_doc = f"""--- DOCUMENT {i} START ---
Source: {source}
Title: {title}
Abstract: {abstract}
--- DOCUMENT {i} END ---"""
        
        formatted_results.append(formatted_doc)
    
    return "\n\n".join(formatted_results)

@tool
def web_search(query: str) -> str:
    """
    Search the web for information
    Args:
        query: The query to search for (should be a list of URLs or single URL)
    Returns:
        The search results
    """
    logger.info(f"Searching the web for: {query}")
    # Note: WebBaseLoader requires URLs, so this assumes query contains URLs
    # For a more general web search, you'd need a different approach like SerpAPI
    try:
        if isinstance(query, str):
            urls = [query] if query.startswith('http') else []
        else:
            urls = query
            
        if not urls:
            return "No valid URLs provided for web search."
            
        # Limit to 2 URLs maximum
        urls = urls[:2]
        docs_found = WebBaseLoader(urls).load()
        formatted_results = []
        
        for i, doc in enumerate(docs_found, 1):
            source = doc.metadata.get('source', 'Unknown source')
            title = doc.metadata.get('title', 'Untitled')
            
            # Get first 1000 characters of content
            content = doc.page_content.strip()
            first_1000_chars = content if content else "No content available"
            # if len(content) > 1000:
            #     first_1000_chars += "..."
            
            formatted_doc = f"""--- DOCUMENT {i} START ---
Source: {source}
Title: {title}
Content: {first_1000_chars}
--- DOCUMENT {i} END ---"""
            
            formatted_results.append(formatted_doc)
        
        return "\n\n".join(formatted_results)
        
    except Exception as e:
        return f"Error during web search: {str(e)}"

@tool
def youtube_transcript(url: str) -> str:
    """
    Get transcript of YouTube video.
    Args:
        url: YouTube video url in ""
    """    
    logger.info(f"Getting transcript of YouTube video: {url}")
    video_id = url.partition("https://www.youtube.com/watch?v=")[2]
    transcript = YouTubeTranscriptApi.get_transcript(video_id)
    transcript_text = " ".join([item["text"] for item in transcript])
    return {"youtube_transcript": transcript_text}

@tool
def python_interpreter(code: str) -> str:
    """
    Execute Python code and return the result.
    Args:
        code: Python code to execute
    Returns:
        The output of the executed code or error message
    """
    try:
        # Create a StringIO object to capture stdout
        import sys
        from io import StringIO
        
        # Capture stdout
        old_stdout = sys.stdout
        sys.stdout = captured_output = StringIO()
        
        # Create a local namespace for execution
        local_namespace = {
            '__builtins__': __builtins__,
            'print': print,
            'len': len,
            'str': str,
            'int': int,
            'float': float,
            'list': list,
            'dict': dict,
            'tuple': tuple,
            'set': set,
            'range': range,
            'enumerate': enumerate,
            'zip': zip,
            'map': map,
            'filter': filter,
            'sum': sum,
            'max': max,
            'min': min,
            'abs': abs,
            'round': round,
            'sorted': sorted,
            'reversed': reversed,
            'any': any,
            'all': all,
            'isinstance': isinstance,
            'type': type,
            'hasattr': hasattr,
            'getattr': getattr,
            'setattr': setattr,
            'dir': dir,
            'help': help,
        }
        
        # Allow common safe imports
        allowed_modules = {
            'math', 'random', 'datetime', 'json', 'collections', 
            'itertools', 'functools', 're', 'statistics'
        }
        
        # Parse and execute the code
        import ast
        
        # Check for dangerous operations
        dangerous_keywords = ['import os', 'import sys', 'import subprocess', 'exec', 'eval', '__import__', 'open(', 'file(']
        for keyword in dangerous_keywords:
            if keyword in code.lower():
                if keyword.startswith('import') and any(module in code for module in allowed_modules):
                    continue
                else:
                    return f"Error: Potentially dangerous operation detected: {keyword}"
        
        # Execute the code
        exec(code, {"__builtins__": {}}, local_namespace)
        
        # Get the captured output
        output = captured_output.getvalue()
        
        # Restore stdout
        sys.stdout = old_stdout
        
        return output if output else "Code executed successfully (no output)"
        
    except Exception as e:
        # Restore stdout in case of error
        sys.stdout = old_stdout
        return f"Error executing code: {str(e)}"
    finally:
        # Ensure stdout is always restored
        sys.stdout = old_stdout
        
@tool
def web_search_duckduckgo(query: str) -> dict:
  """Search DuckDuckGo for a query and return maximum 3 results.
  Args:
      query: The search query."""
  search_docs = DuckDuckGoSearchResults(output_format="list", max_results=3).invoke(input=query)
  formatted_search_docs = "\n\n---\n\n".join(
    [
      f'<Document>\n{doc.get("content", "")}\n</Document>'
      for doc in search_docs
    ])
  return {"web_results": formatted_search_docs}

@tool
def youtube_loader(youtube_url: str) -> dict:
  """Elaborate a YouTube video to transcript the content, return the transcript.
  Args:
      youtube_url: The YouTube video URL."""
  loader = YoutubeLoader.from_youtube_url(
    youtube_url,
    add_video_info=True
  )
  return {"youtube_transcript": loader.load()}

@tool
def web_search_tavily(query: str) -> dict:
  """Search Tavily for a query and return maximum 3 results.
  Args:
      query: The search query."""
  search_docs = TavilySearchResults(max_results=3).invoke({'query': query})
  formatted_search_docs = "\n\n---\n\n".join(
    [
      f'<Document>\n{doc.get("content", "")}\n</Document>'
      for doc in search_docs
    ])
  return {"web_results": formatted_search_docs}