Spaces:
Running
Running
from typing import List, Optional, Dict | |
from smolagents import Tool | |
class HFLinkReportTool(Tool): | |
"""Generate a single-layout HTML report (cards + counters) from a final textual answer. | |
The tool extracts links from the provided text, categorizes them (HF models/datasets/spaces/papers, blogs, repos, videos, news), | |
and renders a consistent link report. Always returns a full HTML document (starts with <!DOCTYPE html>).""" | |
name = "hf_links_to_report" | |
description = ( | |
"Create an HTML report from a final answer text. The tool parses links, groups them into categories " | |
"(Hugging Face models/datasets/spaces/papers and external resources like blogs/repos/videos/news), and renders cards. " | |
"Inputs: final_answer (string, required), query (string, optional), title (string, optional). Returns an HTML document." | |
) | |
inputs = { | |
"final_answer": {"type": "string", "description": "Final answer text containing inline links"}, | |
"query": {"type": "string", "description": "Original user intent or topic", "nullable": True}, | |
"title": {"type": "string", "description": "Dashboard title", "nullable": True}, | |
} | |
output_type = "string" | |
def forward(self, final_answer: str, query: Optional[str] = None, title: Optional[str] = None) -> str: | |
try: | |
import re | |
import json as _json | |
doc_title = title or "Report" | |
query = (query or "").strip() | |
header_html = f"<div class=\"header\"><div><div class=\"title\">{title}</div></div></div>" if title else "" | |
# Extract URLs | |
urls = re.findall(r"https?://[^\s)\]]+", final_answer or "") | |
# Categorize | |
cats = { | |
"models": [], "datasets": [], "spaces": [], "papers": [], | |
"blogs": [], "repos": [], "videos": [], "news": [], "other": [] | |
} | |
for u in urls: | |
low = u.lower() | |
if "huggingface.co/" in low: | |
# Prefer explicit kinds first to avoid misclassifying /datasets/* as generic owner/repo | |
if "/datasets/" in low: | |
cats["datasets"].append(u) | |
elif "/spaces/" in low: | |
cats["spaces"].append(u) | |
elif "/papers/" in low: | |
cats["papers"].append(u) | |
elif "/models/" in low: | |
cats["models"].append(u) | |
else: | |
# Treat bare owner/repo as models only if it is NOT under known sections | |
# e.g., huggingface.co/owner/repo → model repo; huggingface.co/blog/... → blog | |
m = re.search(r"huggingface\.co/([^/]+)/([^/]+)$", low) | |
if m and m.group(1) not in {"datasets", "spaces", "papers", "blog", "learn", "docs", "organizations", "collections"}: | |
cats["models"].append(u) | |
else: | |
cats["blogs"].append(u) | |
elif "github.com" in low: | |
cats["repos"].append(u) | |
elif "youtube.com" in low or "youtu.be" in low: | |
cats["videos"].append(u) | |
elif any(d in low for d in ["arxiv.org", "medium.com", "towardsdatascience.com", "huggingface.co/blog", "huggingface.co/learn"]): | |
cats["blogs"].append(u) | |
elif any(d in low for d in ["theverge.com", "techcrunch.com", "venturebeat.com", "wired.com", "mit.edu"]): | |
cats["news"].append(u) | |
else: | |
cats["other"].append(u) | |
def chips_section(): | |
chips = [ | |
("Models", len(cats["models"])), | |
("Datasets", len(cats["datasets"])), | |
("Spaces", len(cats["spaces"])), | |
("Papers", len(cats["papers"])), | |
("Blogs/Docs", len(cats["blogs"])), | |
("Repos", len(cats["repos"])), | |
("Videos", len(cats["videos"])), | |
("News", len(cats["news"])) | |
] | |
return "\n".join([f"<div class=stat-chip>{name}: {count}</div>" for name, count in chips]) | |
def host_icon(host: str) -> str: | |
return "" | |
def card_list(urls: List[str], data_cat: str) -> str: | |
items = [] | |
for u in urls: | |
host = re.sub(r"^https?://", "", u).split("/")[0] | |
icon = host_icon(host) | |
favicon = f"https://www.google.com/s2/favicons?sz=32&domain={host}" | |
items.append( | |
f"<div class=card data-cat='{data_cat}'>" | |
f"<div class=card-title>{icon} <img class=\"fav\" src=\"{favicon}\" alt=\"\"/> <a href='{u}' target=_blank rel=noopener>{u}</a></div>" | |
f"<div class=card-subtitle>{host}</div>" | |
f"<div class=card-actions><button onclick=\"copyLink('{u}')\">Copy</button></div>" | |
"</div>" | |
) | |
return "\n".join(items) | |
def section(title_text: str, urls: List[str], key: str) -> str: | |
if not urls: | |
return "" | |
return f"<section data-key='{key}'><h2>{title_text}</h2><div class=cards>{card_list(urls, key)}</div></section>" | |
html = f"""<!DOCTYPE html> | |
<html lang=\"en\"> | |
<head> | |
<meta charset=\"utf-8\" /> | |
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" /> | |
<title>{doc_title}</title> | |
<style> | |
:root {{ --bg:#0b0d12; --fg:#e6e9ef; --muted:#9aa4b2; --card:#121621; --accent:#5ac8fa; }} | |
body {{ background:var(--bg); color:var(--fg); font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Inter, Arial, sans-serif; margin:0; padding:24px; }} | |
.container {{ max-width: 1200px; margin: 0 auto; }} | |
.header {{ display:flex; justify-content:space-between; align-items:center; gap:12px; margin-bottom: 12px; }} | |
.title {{ font-size: 22px; margin: 0; }} | |
.subtitle {{ color: var(--muted); }} | |
.stats {{ display:flex; gap:10px; flex-wrap:wrap; margin: 8px 0 18px; }} | |
.stat-chip {{ background: var(--card); border: 1px solid rgba(255,255,255,0.08); border-radius: 999px; padding: 6px 10px; font-size: 12px; color: var(--muted); }} | |
h2 {{ font-size: 16px; margin: 18px 0 8px; color: var(--accent); }} | |
.cards {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(280px,1fr)); gap: 12px; }} | |
.card {{ background: var(--card); border: 1px solid rgba(255,255,255,0.06); border-radius: 10px; padding: 12px; }} | |
.card-title {{ font-weight: 600; margin-bottom: 4px; overflow-wrap:anywhere; }} | |
.card-subtitle {{ color: var(--muted); font-size: 12px; }} | |
.answer {{ line-height:1.55; color:#d2d7df; }} | |
.card-actions button {{ background:#1f2937;color:#e5e7eb;border:1px solid rgba(255,255,255,0.08);border-radius:6px;padding:4px 8px;cursor:pointer;font-size:12px; }} | |
.fav {{ width:14px; height:14px; vertical-align:middle; margin-right:6px; border-radius:4px; }} | |
.warn {{ margin-left:6px; cursor: help; }} | |
</style> | |
<script src=\"https://cdn.jsdelivr.net/npm/marked/marked.min.js\"></script> | |
<script src=\"https://cdn.jsdelivr.net/npm/dompurify@3.1.6/dist/purify.min.js\"></script> | |
</head> | |
<body> | |
<div class=\"container\">{header_html} | |
<h2>You may be interested <span class=\"warn\" title=\"Links may be AI‑generated and might not resolve.\">⚠️</span></h2> | |
<div class=\"stats\">{chips_section()}</div> | |
{section('Models', cats['models'], 'models')} | |
{section('Datasets', cats['datasets'], 'datasets')} | |
{section('Spaces', cats['spaces'], 'spaces')} | |
{section('Papers', cats['papers'], 'papers')} | |
{section('Blogs / Docs', cats['blogs'], 'blogs')} | |
{section('Repositories', cats['repos'], 'repos')} | |
{section('Videos', cats['videos'], 'videos')} | |
{section('News', cats['news'], 'news')} | |
{section('Other', cats['other'], 'other')} | |
</div> | |
<script> | |
function copyLink(url){{ try{{navigator.clipboard && navigator.clipboard.writeText(url);}}catch(e){{}} }} | |
</script> | |
</body> | |
</html> | |
""" | |
return html | |
except Exception as e: | |
return f"<!DOCTYPE html><html><body><pre>Error generating report: {str(e)}</pre></body></html>" |