dot-txt / llm.py
Kelly Bucaille
Update llm.py
70a30ed
import os
import time
from smolagents import (
CodeAgent,
LiteLLMModel,
VisitWebpageTool,
GoogleSearchTool,
MCPClient,
OpenAIServerModel
)
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY")
def get_anthropic_model():
model = LiteLLMModel(
model_id="claude-opus-4-20250514",
api_key=os.getenv("ANTHROPIC_API_KEY"),
)
return model
def get_openai_model():
model = OpenAIServerModel(
model_id="gpt-4o",
api_base="https://api.openai.com/v1",
api_key=os.environ["OPENAI_API_KEY"],
)
return model
def get_firecrawl_tools(max_retries=1, timeout=5):
"""Initialize Firecrawl tools with retry logic and timeout."""
for attempt in range(max_retries):
try:
firecrawl_mcp_client = MCPClient(
server_parameters={
"url": f"https://mcp.firecrawl.dev/{FIRECRAWL_API_KEY}/sse",
"timeout": timeout
}
)
return firecrawl_mcp_client.get_tools()
except Exception as e:
if attempt == max_retries - 1:
print(f"Failed to initialize Firecrawl tools after {max_retries} attempts: {str(e)}")
return []
time.sleep(1) # Wait before retrying
def get_agent(model):
sitemap_mcp_client = MCPClient(
server_parameters={
"url": "https://a17o-sitemap-generator-mcp.hf.space/gradio_api/mcp/sse",
"timeout": 30
}
)
sitemap_tools = sitemap_mcp_client.get_tools()
firecrawl_tools = get_firecrawl_tools()
tools = [
VisitWebpageTool(),
GoogleSearchTool(),
*sitemap_tools,
*firecrawl_tools,
]
description = """
You are a helpful assistant that can generate a full llm txt file from a website.
You can use the following tools to help you:
- VisitWebpageTool: to visit a website
- GoogleSearchTool: to search the web
- SitemapTools: to generate a sitemap for a website
- FirecrawlTools: to crawl a website, always use a timeout of 15000 milliseconds.
For FirecrawlTools, use the parameters of the following shape:
```
{
"name": "firecrawl_scrape",
"arguments": {
"url": "https://example.com",
"formats": ["markdown"],
"onlyMainContent": true,
"waitFor": 1000,
"timeout": 15000, // milliseconds
"mobile": false,
"includeTags": ["article", "main"],
"excludeTags": ["nav", "footer"],
"skipTlsVerification": false
}
}
```
You also have access to an tool to generate a sitemap for a website.
You can use the sitemap to have a better understanding of the website structure.
You will be given a website url and you will need to generate a full llm txt file from the website.
You can search for llm txt files on the web using the GoogleSearchTool with the following query: "site:docs.* llm-full.txt".
"""
agent = CodeAgent(
model=model,
name="LLMFullTextGenerator",
description=description,
tools=tools,
)
return agent