from typing import List, Optional, Dict from smolagents import Tool class HFLinkReportTool(Tool): """Generate a single-layout HTML report (cards + counters) from a final textual answer. The tool extracts links from the provided text, categorizes them (HF models/datasets/spaces/papers, blogs, repos, videos, news), and renders a consistent link report. Always returns a full HTML document (starts with ).""" name = "hf_links_to_report" description = ( "Create an HTML report from a final answer text. The tool parses links, groups them into categories " "(Hugging Face models/datasets/spaces/papers and external resources like blogs/repos/videos/news), and renders cards. " "Inputs: final_answer (string, required), query (string, optional), title (string, optional). Returns an HTML document." ) inputs = { "final_answer": {"type": "string", "description": "Final answer text containing inline links"}, "query": {"type": "string", "description": "Original user intent or topic", "nullable": True}, "title": {"type": "string", "description": "Dashboard title", "nullable": True}, } output_type = "string" def forward(self, final_answer: str, query: Optional[str] = None, title: Optional[str] = None) -> str: try: import re import json as _json doc_title = title or "Report" query = (query or "").strip() header_html = f"
{title}
" if title else "" # Extract URLs urls = re.findall(r"https?://[^\s)\]]+", final_answer or "") # Categorize cats = { "models": [], "datasets": [], "spaces": [], "papers": [], "blogs": [], "repos": [], "videos": [], "news": [], "other": [] } for u in urls: low = u.lower() if "huggingface.co/" in low: # Prefer explicit kinds first to avoid misclassifying /datasets/* as generic owner/repo if "/datasets/" in low: cats["datasets"].append(u) elif "/spaces/" in low: cats["spaces"].append(u) elif "/papers/" in low: cats["papers"].append(u) elif "/models/" in low: cats["models"].append(u) else: # Treat bare owner/repo as models only if it is NOT under known sections # e.g., huggingface.co/owner/repo → model repo; huggingface.co/blog/... → blog m = re.search(r"huggingface\.co/([^/]+)/([^/]+)$", low) if m and m.group(1) not in {"datasets", "spaces", "papers", "blog", "learn", "docs", "organizations", "collections"}: cats["models"].append(u) else: cats["blogs"].append(u) elif "github.com" in low: cats["repos"].append(u) elif "youtube.com" in low or "youtu.be" in low: cats["videos"].append(u) elif any(d in low for d in ["arxiv.org", "medium.com", "towardsdatascience.com", "huggingface.co/blog", "huggingface.co/learn"]): cats["blogs"].append(u) elif any(d in low for d in ["theverge.com", "techcrunch.com", "venturebeat.com", "wired.com", "mit.edu"]): cats["news"].append(u) else: cats["other"].append(u) def chips_section(): chips = [ ("Models", len(cats["models"])), ("Datasets", len(cats["datasets"])), ("Spaces", len(cats["spaces"])), ("Papers", len(cats["papers"])), ("Blogs/Docs", len(cats["blogs"])), ("Repos", len(cats["repos"])), ("Videos", len(cats["videos"])), ("News", len(cats["news"])) ] return "\n".join([f"
{name}: {count}
" for name, count in chips]) def host_icon(host: str) -> str: return "" def card_list(urls: List[str], data_cat: str) -> str: items = [] for u in urls: host = re.sub(r"^https?://", "", u).split("/")[0] icon = host_icon(host) favicon = f"https://www.google.com/s2/favicons?sz=32&domain={host}" items.append( f"
" f"
{icon} \"\"/ {u}
" f"
{host}
" f"
" "
" ) return "\n".join(items) def section(title_text: str, urls: List[str], key: str) -> str: if not urls: return "" return f"

{title_text}

{card_list(urls, key)}
" html = f""" {doc_title}
{header_html}

You may be interested ⚠️

{chips_section()}
{section('Models', cats['models'], 'models')} {section('Datasets', cats['datasets'], 'datasets')} {section('Spaces', cats['spaces'], 'spaces')} {section('Papers', cats['papers'], 'papers')} {section('Blogs / Docs', cats['blogs'], 'blogs')} {section('Repositories', cats['repos'], 'repos')} {section('Videos', cats['videos'], 'videos')} {section('News', cats['news'], 'news')} {section('Other', cats['other'], 'other')}
""" return html except Exception as e: return f"
Error generating report: {str(e)}
"