File size: 3,636 Bytes
d4598ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3640e57
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from langchain_core.tools import tool
from langchain_community.document_loaders import WebBaseLoader, WikipediaLoader, ArxivLoader

@tool
def wikipedia_search(query: str) -> str:
    """
    Search Wikipedia for information
    Args:
        query: The query to search for
    Returns:
        The search results
    """
    docs_found = WikipediaLoader(query=query, load_max_docs=5).load()
    # format the docs found into a string keeping just first paragraph
    formatted_results = []
    
    for i, doc in enumerate(docs_found, 1):
        source = doc.metadata.get('source', 'Unknown source')
        title = doc.metadata.get('title', 'Untitled')
        
        # Get the first paragraph (split by \n\n and take first part)
        content = doc.page_content.strip()
        first_paragraph = content.split('\n\n')[0] if content else "No content available"
        
        formatted_doc = f"""--- DOCUMENT {i} START ---
Source: {source}
Title: {title}
Content: {first_paragraph}
--- DOCUMENT {i} END ---"""
        
        formatted_results.append(formatted_doc)
    
    return "\n\n".join(formatted_results)

@tool
def arxiv_search(query: str) -> str:
    """
    Search ArXiv for research papers
    Args:
        query: The query to search for
    Returns:
        The search results with abstracts
    """
    docs_found = ArxivLoader(query=query, load_max_docs=3).load()
    formatted_results = []
    
    for i, doc in enumerate(docs_found, 1):
        source = doc.metadata.get('source', 'Unknown source')
        title = doc.metadata.get('title', 'Untitled')
        
        # For ArXiv, the abstract is typically in the page_content or metadata
        abstract = doc.page_content.strip() if doc.page_content else "No abstract available"
        
        formatted_doc = f"""--- DOCUMENT {i} START ---
Source: {source}
Title: {title}
Abstract: {abstract}
--- DOCUMENT {i} END ---"""
        
        formatted_results.append(formatted_doc)
    
    return "\n\n".join(formatted_results)

@tool
def web_search(query: str) -> str:
    """
    Search the web for information
    Args:
        query: The query to search for (should be a list of URLs or single URL)
    Returns:
        The search results with first 1000 characters
    """
    # Note: WebBaseLoader requires URLs, so this assumes query contains URLs
    # For a more general web search, you'd need a different approach like SerpAPI
    try:
        if isinstance(query, str):
            urls = [query] if query.startswith('http') else []
        else:
            urls = query
            
        if not urls:
            return "No valid URLs provided for web search."
            
        # Limit to 4 URLs maximum
        urls = urls[:4]
        docs_found = WebBaseLoader(urls).load()
        formatted_results = []
        
        for i, doc in enumerate(docs_found, 1):
            source = doc.metadata.get('source', 'Unknown source')
            title = doc.metadata.get('title', 'Untitled')
            
            # Get first 1000 characters of content
            content = doc.page_content.strip()
            first_1000_chars = content[:1000] if content else "No content available"
            if len(content) > 1000:
                first_1000_chars += "..."
            
            formatted_doc = f"""--- DOCUMENT {i} START ---
Source: {source}
Title: {title}
Content: {first_1000_chars}
--- DOCUMENT {i} END ---"""
            
            formatted_results.append(formatted_doc)
        
        return "\n\n".join(formatted_results)
        
    except Exception as e:
        return f"Error during web search: {str(e)}"