File size: 9,239 Bytes
539dfc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
"""
OpenSearchAgent - A web search agent for answering questions using the internet

This module implements a powerful agent that can search the web, navigate pages,
and analyze content to answer complex questions. It uses the smolagents library
to create a hierarchical agent system with a manager agent and a web browser agent.

The agent can:
- Search the web using Google (via SerpAPI)
- Visit and navigate web pages
- Find and analyze text content
- Process PDF files and other document formats
- Visualize content when needed

Environment variables required:
- SERPAPI_API_KEY: API key for SerpAPI (for web search)
- OPENAI_API_KEY: API key for OpenAI (for the language model)
- HF_TOKEN: Hugging Face token (for accessing HF resources)
"""

import os
import threading

from dotenv import load_dotenv
from huggingface_hub import login
from scripts.text_inspector_tool import TextInspectorTool
from scripts.text_web_browser import (
    ArchiveSearchTool,
    FinderTool,
    FindNextTool,
    PageDownTool,
    PageUpTool,
    SimpleTextBrowser,
    VisitTool,
)
from scripts.visual_qa import visualizer

from smolagents import (
    CodeAgent,
    GoogleSearchTool,
    # InferenceClientModel,  # Uncomment if you want to use InferenceClientModel
    LiteLLMModel,
    ToolCallingAgent,
    OpenAIServerModel,
)


# Load environment variables and authenticate with Hugging Face
load_dotenv(override=True)
login(os.getenv("HF_TOKEN"))

# Global configurations for the agent
custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"}

# User agent string for web requests to avoid being blocked by websites
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"

# Browser configuration for web navigation
BROWSER_CONFIG = {
    "viewport_size": 1024 * 5,  # Large viewport for capturing more content
    "downloads_folder": "downloads_folder",  # Where to store downloaded files
    "request_kwargs": {
        "headers": {"User-Agent": user_agent},
        "timeout": 300,  # Generous timeout for slow websites
    },
    "serpapi_key": os.getenv("SERPAPI_API_KEY"),  # API key for web search
}

# Create downloads folder if it doesn't exist
os.makedirs(f"./{BROWSER_CONFIG['downloads_folder']}", exist_ok=True)


class OpenSearchAgent:
    """
    A portable agent that can search the web and answer questions.
    This class encapsulates the functionality of the web search agent.
    """
    
    def __init__(self, model_id="o1"):
        """
        Initialize the OpenSearchAgent with the specified model.
        
        Args:
            model_id (str): The model ID to use for the agent. Default is "o1".
                           Other options include "gpt-4o", "claude-3-opus", etc.
        """
        self.model_id = model_id
        self.agent = self._create_agent()
        
    def _create_agent(self):
        """
        Create and configure the agent with the appropriate tools and models.
        
        This is where you can customize the agent by adding new tools or
        changing the configuration of existing ones.
        
        Returns:
            CodeAgent: The configured agent ready to answer questions.
        """
        # Configure the model parameters
        model_params = {
            "model_id": self.model_id,
            "custom_role_conversions": custom_role_conversions,
            "max_completion_tokens": 8192,
        }
        if self.model_id == "o1":
            model_params["reasoning_effort"] = "high"
        print(f"Using model parameters: {model_params}")
        
        # Initialize the model
        # You can switch between different model providers here
        # model = LiteLLMModel(**model_params)  # For using LiteLLM
        model = OpenAIServerModel(model_id="gpt-4o")  # For using OpenAI directly
        
        # Configure text browser and tools
        text_limit = 100000  # Maximum text length to process
        browser = SimpleTextBrowser(**BROWSER_CONFIG)
        
        # ===== TOOL CONFIGURATION =====
        # This is where you can add new tools to enhance the agent's capabilities
        WEB_TOOLS = [
            GoogleSearchTool(provider="serpapi"),  # Web search tool
            VisitTool(browser),                    # Visit URLs
            PageUpTool(browser),                   # Navigate up in a page
            PageDownTool(browser),                 # Navigate down in a page
            FinderTool(browser),                   # Find text in a page
            FindNextTool(browser),                 # Find next occurrence of text
            ArchiveSearchTool(browser),            # Search web archives
            TextInspectorTool(model, text_limit),  # Analyze text content
            
            # ===== ADD YOUR CUSTOM TOOLS HERE =====
            # Example:
            # CustomTool(),  # Your custom tool implementation
            # ImageAnalysisTool(),  # Tool for analyzing images
            # DataExtractionTool(),  # Tool for extracting structured data
        ]
        
        # Create the web browser agent that handles web interactions
        text_webbrowser_agent = ToolCallingAgent(
            model=model,
            tools=WEB_TOOLS,
            max_steps=20,                # Maximum steps before stopping
            verbosity_level=2,           # Level of logging detail
            planning_interval=4,         # How often to re-plan
            name="search_agent",
            description="""A team member that will search the internet to answer your question.
        Ask him for all your questions that require browsing the web.
        Provide him as much context as possible, in particular if you need to search on a specific timeframe!
        And don't hesitate to provide him with a complex search task, like finding a difference between two webpages.
        Your request must be a real sentence, not a google search! Like "Find me this information (...)" rather than a few keywords.
        """,
            provide_run_summary=True,    # Provide summary of actions taken
        )
        
        # Add additional instructions to the web browser agent
        text_webbrowser_agent.prompt_templates["managed_agent"]["task"] += """You can navigate to .txt online files.
        If a non-html page is in another format, especially .pdf or a Youtube video, use tool 'inspect_file_as_text' to inspect it.
        Additionally, if after some searching you find out that you need more information to answer the question, you can use `final_answer` with your request for clarification as argument to request for more information."""

        # ===== MANAGER AGENT CONFIGURATION =====
        # Create the manager agent that oversees the web browser agent
        # You can add more managed agents here for different specialized tasks
        manager_agent = CodeAgent(
            model=model,
            tools=[
                visualizer,                      # Tool for visualization tasks
                TextInspectorTool(model, text_limit),  # Text analysis tool
                
                # ===== ADD YOUR CUSTOM MANAGER TOOLS HERE =====
                # Example:
                # DataAnalysisTool(),  # Tool for analyzing data
                # ReportGeneratorTool(),  # Tool for generating reports
            ],
            max_steps=12,                # Maximum steps before stopping
            verbosity_level=2,           # Level of logging detail
            additional_authorized_imports=["*"],  # Allow all imports
            planning_interval=4,         # How often to re-plan
            managed_agents=[
                text_webbrowser_agent,   # The web browser agent
                
                # ===== ADD YOUR CUSTOM MANAGED AGENTS HERE =====
                # Example:
                # data_analysis_agent,  # An agent specialized in data analysis
                # image_processing_agent,  # An agent specialized in image processing
            ],
        )

        return manager_agent
    
    def __call__(self, question: str) -> str:
        """
        Run the agent on the given question.
        
        Args:
            question (str): The question to answer.
            
        Returns:
            str: The agent's answer to the question.
        """
        print(f"OpenSearchAgent received question: {question[:50]}...")
        answer = self.agent.run(question)
        
        # Convert answer to string to ensure it's subscriptable
        answer_str = str(answer)
        print(f"OpenSearchAgent found answer: {answer_str[:100]}...")
        
        return answer_str


def main():
    """
    Example usage of the OpenSearchAgent.
    
    This function demonstrates how to create and use the OpenSearchAgent.
    You can modify the question or model_id to test different configurations.
    """
    # Define your question here
    question = "How many studio albums did Mercedes Sosa release before 2007?"
    
    # Create the agent
    agent = OpenSearchAgent(model_id="o1")
    
    # Run the agent
    answer = agent(question)
    
    print(f"Got this answer: {answer}")


if __name__ == "__main__":
    main()