Spaces:
Running
Running
File size: 3,194 Bytes
bd70f31 70a30ed 3213b51 bd70f31 3213b51 bd70f31 3213b51 bd70f31 3213b51 bd70f31 70a30ed bd70f31 3213b51 70a30ed 3213b51 70a30ed 3213b51 bd70f31 3213b51 bd70f31 3213b51 70a30ed 3213b51 bd70f31 3213b51 bd70f31 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import os
import time
from smolagents import (
CodeAgent,
LiteLLMModel,
VisitWebpageTool,
GoogleSearchTool,
MCPClient,
OpenAIServerModel
)
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
def get_anthropic_model():
model = LiteLLMModel(
model_id="claude-opus-4-20250514",
api_key=os.getenv("ANTHROPIC_API_KEY"),
)
return model
def get_openai_model():
model = OpenAIServerModel(
model_id="gpt-4o",
api_base="https://api.openai.com/v1",
api_key=os.environ["OPENAI_API_KEY"],
)
return model
def get_firecrawl_tools(max_retries=1, timeout=5):
"""Initialize Firecrawl tools with retry logic and timeout."""
for attempt in range(max_retries):
try:
firecrawl_mcp_client = MCPClient(
server_parameters={
"url": f"https://mcp.firecrawl.dev/{FIRECRAWL_API_KEY}/sse",
"timeout": timeout
}
)
return firecrawl_mcp_client.get_tools()
except Exception as e:
if attempt == max_retries - 1:
print(f"Failed to initialize Firecrawl tools after {max_retries} attempts: {str(e)}")
return []
time.sleep(1) # Wait before retrying
def get_agent(model):
sitemap_mcp_client = MCPClient(
server_parameters={
"url": "https://a17o-sitemap-generator-mcp.hf.space/gradio_api/mcp/sse",
"timeout": 30
}
)
sitemap_tools = sitemap_mcp_client.get_tools()
firecrawl_tools = get_firecrawl_tools()
tools = [
VisitWebpageTool(),
GoogleSearchTool(),
*sitemap_tools,
*firecrawl_tools,
]
description = """
You are a helpful assistant that can generate a full llm txt file from a website.
You can use the following tools to help you:
- VisitWebpageTool: to visit a website
- GoogleSearchTool: to search the web
- SitemapTools: to generate a sitemap for a website
- FirecrawlTools: to crawl a website, always use a timeout of 15000 milliseconds.
For FirecrawlTools, use the parameters of the following shape:
```
{
"name": "firecrawl_scrape",
"arguments": {
"url": "https://example.com",
"formats": ["markdown"],
"onlyMainContent": true,
"waitFor": 1000,
"timeout": 15000, // milliseconds
"mobile": false,
"includeTags": ["article", "main"],
"excludeTags": ["nav", "footer"],
"skipTlsVerification": false
}
}
```
You also have access to an tool to generate a sitemap for a website.
You can use the sitemap to have a better understanding of the website structure.
You will be given a website url and you will need to generate a full llm txt file from the website.
You can search for llm txt files on the web using the GoogleSearchTool with the following query: "site:docs.* llm-full.txt".
"""
agent = CodeAgent(
model=model,
name="LLMFullTextGenerator",
description=description,
tools=tools,
)
return agent
|