Spaces:
Running
Running
import os | |
import time | |
from smolagents import ( | |
CodeAgent, | |
LiteLLMModel, | |
VisitWebpageTool, | |
GoogleSearchTool, | |
MCPClient, | |
OpenAIServerModel | |
) | |
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY") | |
def get_anthropic_model(): | |
model = LiteLLMModel( | |
model_id="claude-opus-4-20250514", | |
api_key=os.getenv("ANTHROPIC_API_KEY"), | |
) | |
return model | |
def get_openai_model(): | |
model = OpenAIServerModel( | |
model_id="gpt-4o", | |
api_base="https://api.openai.com/v1", | |
api_key=os.environ["OPENAI_API_KEY"], | |
) | |
return model | |
def get_firecrawl_tools(max_retries=1, timeout=5): | |
"""Initialize Firecrawl tools with retry logic and timeout.""" | |
for attempt in range(max_retries): | |
try: | |
firecrawl_mcp_client = MCPClient( | |
server_parameters={ | |
"url": f"https://mcp.firecrawl.dev/{FIRECRAWL_API_KEY}/sse", | |
"timeout": timeout | |
} | |
) | |
return firecrawl_mcp_client.get_tools() | |
except Exception as e: | |
if attempt == max_retries - 1: | |
print(f"Failed to initialize Firecrawl tools after {max_retries} attempts: {str(e)}") | |
return [] | |
time.sleep(1) # Wait before retrying | |
def get_agent(model): | |
sitemap_mcp_client = MCPClient( | |
server_parameters={ | |
"url": "https://a17o-sitemap-generator-mcp.hf.space/gradio_api/mcp/sse", | |
"timeout": 30 | |
} | |
) | |
sitemap_tools = sitemap_mcp_client.get_tools() | |
firecrawl_tools = get_firecrawl_tools() | |
tools = [ | |
VisitWebpageTool(), | |
GoogleSearchTool(), | |
*sitemap_tools, | |
*firecrawl_tools, | |
] | |
description = """ | |
You are a helpful assistant that can generate a full llm txt file from a website. | |
You can use the following tools to help you: | |
- VisitWebpageTool: to visit a website | |
- GoogleSearchTool: to search the web | |
- SitemapTools: to generate a sitemap for a website | |
- FirecrawlTools: to crawl a website, always use a timeout of 15000 milliseconds. | |
For FirecrawlTools, use the parameters of the following shape: | |
``` | |
{ | |
"name": "firecrawl_scrape", | |
"arguments": { | |
"url": "https://example.com", | |
"formats": ["markdown"], | |
"onlyMainContent": true, | |
"waitFor": 1000, | |
"timeout": 15000, // milliseconds | |
"mobile": false, | |
"includeTags": ["article", "main"], | |
"excludeTags": ["nav", "footer"], | |
"skipTlsVerification": false | |
} | |
} | |
``` | |
You also have access to an tool to generate a sitemap for a website. | |
You can use the sitemap to have a better understanding of the website structure. | |
You will be given a website url and you will need to generate a full llm txt file from the website. | |
You can search for llm txt files on the web using the GoogleSearchTool with the following query: "site:docs.* llm-full.txt". | |
""" | |
agent = CodeAgent( | |
model=model, | |
name="LLMFullTextGenerator", | |
description=description, | |
tools=tools, | |
) | |
return agent | |