from typing import List, Optional, Dict
from smolagents import Tool
class HFLinkReportTool(Tool):
"""Generate a single-layout HTML report (cards + counters) from a final textual answer.
The tool extracts links from the provided text, categorizes them (HF models/datasets/spaces/papers, blogs, repos, videos, news),
and renders a consistent link report. Always returns a full HTML document (starts with )."""
name = "hf_links_to_report"
description = (
"Create an HTML report from a final answer text. The tool parses links, groups them into categories "
"(Hugging Face models/datasets/spaces/papers and external resources like blogs/repos/videos/news), and renders cards. "
"Inputs: final_answer (string, required), query (string, optional), title (string, optional). Returns an HTML document."
)
inputs = {
"final_answer": {"type": "string", "description": "Final answer text containing inline links"},
"query": {"type": "string", "description": "Original user intent or topic", "nullable": True},
"title": {"type": "string", "description": "Dashboard title", "nullable": True},
}
output_type = "string"
def forward(self, final_answer: str, query: Optional[str] = None, title: Optional[str] = None) -> str:
try:
import re
import json as _json
doc_title = title or "Report"
query = (query or "").strip()
header_html = f"
{title}
" if title else ""
# Extract URLs
urls = re.findall(r"https?://[^\s)\]]+", final_answer or "")
# Categorize
cats = {
"models": [], "datasets": [], "spaces": [], "papers": [],
"blogs": [], "repos": [], "videos": [], "news": [], "other": []
}
for u in urls:
low = u.lower()
if "huggingface.co/" in low:
# Prefer explicit kinds first to avoid misclassifying /datasets/* as generic owner/repo
if "/datasets/" in low:
cats["datasets"].append(u)
elif "/spaces/" in low:
cats["spaces"].append(u)
elif "/papers/" in low:
cats["papers"].append(u)
elif "/models/" in low:
cats["models"].append(u)
else:
# Treat bare owner/repo as models only if it is NOT under known sections
# e.g., huggingface.co/owner/repo → model repo; huggingface.co/blog/... → blog
m = re.search(r"huggingface\.co/([^/]+)/([^/]+)$", low)
if m and m.group(1) not in {"datasets", "spaces", "papers", "blog", "learn", "docs", "organizations", "collections"}:
cats["models"].append(u)
else:
cats["blogs"].append(u)
elif "github.com" in low:
cats["repos"].append(u)
elif "youtube.com" in low or "youtu.be" in low:
cats["videos"].append(u)
elif any(d in low for d in ["arxiv.org", "medium.com", "towardsdatascience.com", "huggingface.co/blog", "huggingface.co/learn"]):
cats["blogs"].append(u)
elif any(d in low for d in ["theverge.com", "techcrunch.com", "venturebeat.com", "wired.com", "mit.edu"]):
cats["news"].append(u)
else:
cats["other"].append(u)
def chips_section():
chips = [
("Models", len(cats["models"])),
("Datasets", len(cats["datasets"])),
("Spaces", len(cats["spaces"])),
("Papers", len(cats["papers"])),
("Blogs/Docs", len(cats["blogs"])),
("Repos", len(cats["repos"])),
("Videos", len(cats["videos"])),
("News", len(cats["news"]))
]
return "\n".join([f"
{name}: {count}
" for name, count in chips])
def host_icon(host: str) -> str:
return ""
def card_list(urls: List[str], data_cat: str) -> str:
items = []
for u in urls:
host = re.sub(r"^https?://", "", u).split("/")[0]
icon = host_icon(host)
favicon = f"https://www.google.com/s2/favicons?sz=32&domain={host}"
items.append(
f"