File size: 8,476 Bytes
b67af4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e061bc4
b67af4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e061bc4
b67af4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from typing import List, Optional, Dict
from smolagents import Tool

class HFLinkReportTool(Tool):
    """Generate a single-layout HTML report (cards + counters) from a final textual answer.
    The tool extracts links from the provided text, categorizes them (HF models/datasets/spaces/papers, blogs, repos, videos, news),
    and renders a consistent link report. Always returns a full HTML document (starts with <!DOCTYPE html>)."""

    name = "hf_links_to_report"
    description = (
        "Create an HTML report from a final answer text. The tool parses links, groups them into categories "
        "(Hugging Face models/datasets/spaces/papers and external resources like blogs/repos/videos/news), and renders cards. "
        "Inputs: final_answer (string, required), query (string, optional), title (string, optional). Returns an HTML document."
    )
    inputs = {
        "final_answer": {"type": "string", "description": "Final answer text containing inline links"},
        "query": {"type": "string", "description": "Original user intent or topic", "nullable": True},
        "title": {"type": "string", "description": "Dashboard title", "nullable": True},
    }
    output_type = "string"

    def forward(self, final_answer: str, query: Optional[str] = None, title: Optional[str] = None) -> str:
        try:
            import re
            import json as _json
            doc_title = title or "Report"
            query = (query or "").strip()
            header_html = f"<div class=\"header\"><div><div class=\"title\">{title}</div></div></div>" if title else ""

            # Extract URLs
            urls = re.findall(r"https?://[^\s)\]]+", final_answer or "")
            # Categorize
            cats = {
                "models": [], "datasets": [], "spaces": [], "papers": [],
                "blogs": [], "repos": [], "videos": [], "news": [], "other": []
            }
            for u in urls:
                low = u.lower()
                if "huggingface.co/" in low:
                    # Prefer explicit kinds first to avoid misclassifying /datasets/* as generic owner/repo
                    if "/datasets/" in low:
                        cats["datasets"].append(u)
                    elif "/spaces/" in low:
                        cats["spaces"].append(u)
                    elif "/papers/" in low:
                        cats["papers"].append(u)
                    elif "/models/" in low:
                        cats["models"].append(u)
                    else:
                        # Treat bare owner/repo as models only if it is NOT under known sections
                        # e.g., huggingface.co/owner/repo → model repo; huggingface.co/blog/... → blog
                        m = re.search(r"huggingface\.co/([^/]+)/([^/]+)$", low)
                        if m and m.group(1) not in {"datasets", "spaces", "papers", "blog", "learn", "docs", "organizations", "collections"}:
                            cats["models"].append(u)
                        else:
                            cats["blogs"].append(u)
                elif "github.com" in low:
                    cats["repos"].append(u)
                elif "youtube.com" in low or "youtu.be" in low:
                    cats["videos"].append(u)
                elif any(d in low for d in ["arxiv.org", "medium.com", "towardsdatascience.com", "huggingface.co/blog", "huggingface.co/learn"]):
                    cats["blogs"].append(u)
                elif any(d in low for d in ["theverge.com", "techcrunch.com", "venturebeat.com", "wired.com", "mit.edu"]):
                    cats["news"].append(u)
                else:
                    cats["other"].append(u)

            def chips_section():
                chips = [
                    ("Models", len(cats["models"])),
                    ("Datasets", len(cats["datasets"])),
                    ("Spaces", len(cats["spaces"])),
                    ("Papers", len(cats["papers"])),
                    ("Blogs/Docs", len(cats["blogs"])),
                    ("Repos", len(cats["repos"])),
                    ("Videos", len(cats["videos"])),
                    ("News", len(cats["news"]))
                ]
                return "\n".join([f"<div class=stat-chip>{name}: {count}</div>" for name, count in chips])

            def host_icon(host: str) -> str:
                return ""

            def card_list(urls: List[str], data_cat: str) -> str:
                items = []
                for u in urls:
                    host = re.sub(r"^https?://", "", u).split("/")[0]
                    icon = host_icon(host)
                    favicon = f"https://www.google.com/s2/favicons?sz=32&domain={host}"
                    items.append(
                        f"<div class=card data-cat='{data_cat}'>"
                        f"<div class=card-title>{icon} <img class=\"fav\" src=\"{favicon}\" alt=\"\"/> <a href='{u}' target=_blank rel=noopener>{u}</a></div>"
                        f"<div class=card-subtitle>{host}</div>"
                        f"<div class=card-actions><button onclick=\"copyLink('{u}')\">Copy</button></div>"
                        "</div>"
                    )
                return "\n".join(items)

            def section(title_text: str, urls: List[str], key: str) -> str:
                if not urls:
                    return ""
                return f"<section data-key='{key}'><h2>{title_text}</h2><div class=cards>{card_list(urls, key)}</div></section>"

            html = f"""<!DOCTYPE html>
<html lang=\"en\">
<head>
  <meta charset=\"utf-8\" />
  <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />
  <title>{doc_title}</title>
  <style>
    :root {{ --bg:#0b0d12; --fg:#e6e9ef; --muted:#9aa4b2; --card:#121621; --accent:#5ac8fa; }}
    body {{ background:var(--bg); color:var(--fg); font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Inter, Arial, sans-serif; margin:0; padding:24px; }}
    .container {{ max-width: 1200px; margin: 0 auto; }}
    .header {{ display:flex; justify-content:space-between; align-items:center; gap:12px; margin-bottom: 12px; }}
    .title {{ font-size: 22px; margin: 0; }}
    .subtitle {{ color: var(--muted); }}
    .stats {{ display:flex; gap:10px; flex-wrap:wrap; margin: 8px 0 18px; }}
    .stat-chip {{ background: var(--card); border: 1px solid rgba(255,255,255,0.08); border-radius: 999px; padding: 6px 10px; font-size: 12px; color: var(--muted); }}
    h2 {{ font-size: 16px; margin: 18px 0 8px; color: var(--accent); }}
    .cards {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(280px,1fr)); gap: 12px; }}
    .card {{ background: var(--card); border: 1px solid rgba(255,255,255,0.06); border-radius: 10px; padding: 12px; }}
    .card-title {{ font-weight: 600; margin-bottom: 4px; overflow-wrap:anywhere; }}
    .card-subtitle {{ color: var(--muted); font-size: 12px; }}
    .answer {{ line-height:1.55; color:#d2d7df; }}
    .card-actions button {{ background:#1f2937;color:#e5e7eb;border:1px solid rgba(255,255,255,0.08);border-radius:6px;padding:4px 8px;cursor:pointer;font-size:12px; }}
    .fav {{ width:14px; height:14px; vertical-align:middle; margin-right:6px; border-radius:4px; }}
    .warn {{ margin-left:6px; cursor: help; }}
  </style>
  <script src=\"https://cdn.jsdelivr.net/npm/marked/marked.min.js\"></script>
  <script src=\"https://cdn.jsdelivr.net/npm/dompurify@3.1.6/dist/purify.min.js\"></script>
</head>
<body>
  <div class=\"container\">{header_html}
    <h2>You may be interested <span class=\"warn\" title=\"Links may be AI‑generated and might not resolve.\">⚠️</span></h2>
    <div class=\"stats\">{chips_section()}</div>
    {section('Models', cats['models'], 'models')}
    {section('Datasets', cats['datasets'], 'datasets')}
    {section('Spaces', cats['spaces'], 'spaces')}
    {section('Papers', cats['papers'], 'papers')}
    {section('Blogs / Docs', cats['blogs'], 'blogs')}
    {section('Repositories', cats['repos'], 'repos')}
    {section('Videos', cats['videos'], 'videos')}
    {section('News', cats['news'], 'news')}
    {section('Other', cats['other'], 'other')}
  </div>
  <script>
    function copyLink(url){{ try{{navigator.clipboard && navigator.clipboard.writeText(url);}}catch(e){{}} }}
  </script>
</body>
</html>
"""
            return html
        except Exception as e:
            return f"<!DOCTYPE html><html><body><pre>Error generating report: {str(e)}</pre></body></html>"