import json import time from typing import Dict, List, Optional, Tuple import requests from smolagents import Tool # ----------------------------- # HTTP helpers (anonymous only) # ----------------------------- DEFAULT_TIMEOUT = 15 RETRY_STATUS = {429, 500, 502, 503, 504} def _anonymous_headers() -> Dict[str, str]: return { "Accept": "application/json", "User-Agent": "HuggingResearch-Agent/1.0 (+https://huggingface.co)", # No Authorization header on purpose (public only) } def _http_get_json(url: str, params: Optional[Dict] = None, max_retries: int = 2) -> Tuple[Optional[Dict | List], int, str]: last_err = "" for attempt in range(max_retries + 1): try: resp = requests.get(url, params=params or {}, headers=_anonymous_headers(), timeout=DEFAULT_TIMEOUT) if resp.status_code == 200: try: return resp.json(), resp.status_code, "" except Exception as je: return None, resp.status_code, f"invalid_json: {je}" if resp.status_code in {401, 403}: # Private/Gated/Unauthorized return None, resp.status_code, "no_access" if resp.status_code in RETRY_STATUS and attempt < max_retries: time.sleep(1.0 * (attempt + 1)) continue return None, resp.status_code, f"http_{resp.status_code}" except Exception as e: last_err = str(e) if attempt < max_retries: time.sleep(0.8 * (attempt + 1)) continue return None, 0, f"exception: {last_err}" # ----------------------------- # Normalization helpers # ----------------------------- def _visibility_from_item(item: Dict) -> Tuple[str, str]: if not isinstance(item, dict): return "public", "accessible" if item.get("private") is True: return "private", "no_access" if item.get("gated") is True or item.get("gatedReason") or (isinstance(item.get("cardData"), dict) and item["cardData"].get("gated")): return "gated", "no_access" return "public", "accessible" def _norm_common(item_id: str, item_type: str, owner: str, description: str = "", url_suffix: str = "") -> Dict: url = f"https://huggingface.co/{url_suffix}{item_id}" if url_suffix else f"https://huggingface.co/{item_id}" return { "type": item_type, "id": item_id, "owner": owner, "url": url, "description": description or "", } def _safe_get(item: Dict, key: str, default=None): return item.get(key, default) if isinstance(item, dict) else default # ----------------------------- # Tools # ----------------------------- class HFModelsSearchTool(Tool): name = "hf_models_search" description = ( "Search public Hugging Face models. Provide a free-text query and optional filters " "(owner, single pipeline_tag, tags CSV, sort/direction, limit). " "Prefer minimal params; add owner/task/tags/sort only when the user implies them. " "Defaults: limit=10, sort omitted, direction omitted. Returns JSON with `results`, `status`, `error`, and `params`." ) inputs = { "query": {"type": "string", "description": "Free-text search", "nullable": True}, "owner": {"type": "string", "description": "Filter by owner/namespace", "nullable": True}, "task": {"type": "string", "description": "Primary pipeline tag, e.g. text-classification", "nullable": True}, "tags": {"type": "string", "description": "Comma-separated tags filter", "nullable": True}, "sort": {"type": "string", "description": "downloads|likes|modified", "nullable": True}, "direction": {"type": "string", "description": "descending|ascending", "nullable": True}, "limit": {"type": "number", "description": "Max results", "nullable": True}, } output_type = "string" def forward(self, query: Optional[str] = None, owner: Optional[str] = None, task: Optional[str] = None, tags: Optional[str] = None, sort: Optional[str] = None, direction: Optional[str] = None, limit: Optional[int] = None) -> str: # Build conservative params params = {} if query: params["search"] = query if owner: params["author"] = owner if task: # pipeline_tag must be a single value; if multiple provided, take the first first_task = task.split(",")[0].strip() if first_task: params["pipeline_tag"] = first_task if tags: # Support comma-separated → repeated tags tag_list = [t.strip() for t in tags.split(",") if t.strip()] if len(tag_list) == 1: params["tags"] = tag_list[0] elif len(tag_list) > 1: params["tags"] = tag_list # requests will repeat param # Support 'trending' as an alias mapped to downloads+descending for recency/interest if sort in {"downloads", "likes", "modified", "trending"}: params["sort"] = sort if sort == "trending": params["sort"] = "downloads" params["direction"] = "descending" elif direction in {"descending", "ascending"}: params["direction"] = direction # Default limit to 10 if not specified lim = int(limit) if limit else 10 params["limit"] = lim data, status, err = _http_get_json("https://huggingface.co/api/models", params) # Fallback: retry with minimal params if 400 if status == 400: minimal = {"search": query} if query else {} if limit: minimal["limit"] = int(limit) data, status, err = _http_get_json("https://huggingface.co/api/models", minimal) results: List[Dict] = [] if isinstance(data, list): for it in data: model_id = _safe_get(it, "id") or _safe_get(it, "modelId") or "" if not model_id: continue owner_name = model_id.split("/")[0] if "/" in model_id else "" desc = "" # If present, short description may live in cardData/summary when full=true; not guaranteed in list visibility, access = _visibility_from_item(it) norm = _norm_common(model_id, "model", owner_name, desc) norm.update({ "tags": _safe_get(it, "tags", []), "task": _safe_get(it, "pipeline_tag"), "likes": _safe_get(it, "likes", 0), "downloads": _safe_get(it, "downloads", 0), "updatedAt": _safe_get(it, "lastModified"), "visibility": visibility, "access": access, }) results.append(norm) return json.dumps({ "results": results, "status": status, "error": err, "params": params, }, ensure_ascii=False) class HFModelInfoTool(Tool): name = "hf_model_info" description = ( "Get detailed public model info by repo id (owner/name). Use this after a model search to fetch richer metadata (cardData, siblings, tags)." ) inputs = { "repo_id": {"type": "string", "description": "Model repo id, e.g. bigscience/bloom"} } output_type = "string" def forward(self, repo_id: str) -> str: data, status, err = _http_get_json(f"https://huggingface.co/api/models/{repo_id}", {"full": "true"}) item: Dict = {} if isinstance(data, dict): model_id = data.get("id") or data.get("modelId") or repo_id owner_name = model_id.split("/")[0] if "/" in model_id else "" visibility, access = _visibility_from_item(data) desc = "" # Some cards put a short summary in cardData/summary if isinstance(data.get("cardData"), dict): desc = data["cardData"].get("summary") or data["cardData"].get("description") or "" item = _norm_common(model_id, "model", owner_name, desc) item.update({ "tags": data.get("tags", []), "task": data.get("pipeline_tag"), "likes": data.get("likes", 0), "downloads": data.get("downloads", 0), "updatedAt": data.get("lastModified"), "visibility": visibility, "access": access, "cardData": data.get("cardData"), "siblings": data.get("siblings"), }) return json.dumps({"item": item, "status": status, "error": err}, ensure_ascii=False) class HFDatasetsSearchTool(Tool): name = "hf_datasets_search" description = ( "Search public datasets with a free-text query and optional filters (owner, tags CSV, sort/direction, limit). " "Prefer minimal params; add filters when implied. Defaults: limit=10. Returns JSON with `results`, `status`, `error`, and `params`." ) inputs = { "query": {"type": "string", "description": "Free-text search", "nullable": True}, "owner": {"type": "string", "description": "Filter by owner/namespace", "nullable": True}, "tags": {"type": "string", "description": "Comma-separated tags filter", "nullable": True}, "sort": {"type": "string", "description": "downloads|likes|modified", "nullable": True}, "direction": {"type": "string", "description": "descending|ascending", "nullable": True}, "limit": {"type": "number", "description": "Max results", "nullable": True}, } output_type = "string" def forward(self, query: Optional[str] = None, owner: Optional[str] = None, tags: Optional[str] = None, sort: Optional[str] = None, direction: Optional[str] = None, limit: Optional[int] = None) -> str: params = {} if query: params["search"] = query if owner: params["author"] = owner if tags: tag_list = [t.strip() for t in tags.split(",")] if isinstance(tags, str) else [] tag_list = [t for t in tag_list if t] if len(tag_list) == 1: params["tags"] = tag_list[0] elif len(tag_list) > 1: params["tags"] = tag_list if sort in {"downloads", "likes", "modified", "trending"}: params["sort"] = sort if sort == "trending": params["sort"] = "downloads" params["direction"] = "descending" elif direction in {"descending", "ascending"}: params["direction"] = direction lim = int(limit) if limit else 10 params["limit"] = lim data, status, err = _http_get_json("https://huggingface.co/api/datasets", params) if status == 400: minimal = {"search": query} if query else {} if limit: minimal["limit"] = int(limit) data, status, err = _http_get_json("https://huggingface.co/api/datasets", minimal) results: List[Dict] = [] if isinstance(data, list): for it in data: ds_id = _safe_get(it, "id") or _safe_get(it, "datasetId") or "" if not ds_id: continue owner_name = ds_id.split("/")[0] if "/" in ds_id else "" visibility, access = _visibility_from_item(it) norm = _norm_common(ds_id, "dataset", owner_name, "") norm.update({ "tags": _safe_get(it, "tags", []), "likes": _safe_get(it, "likes", 0), "downloads": _safe_get(it, "downloads", 0), "updatedAt": _safe_get(it, "lastModified"), "visibility": visibility, "access": access, }) results.append(norm) return json.dumps({"results": results, "status": status, "error": err, "params": params}, ensure_ascii=False) class HFDatasetInfoTool(Tool): name = "hf_dataset_info" description = ( "Get detailed public dataset info by repo id (owner/name). Use after a dataset search to retrieve cardData and siblings." ) inputs = {"repo_id": {"type": "string", "description": "Dataset repo id, e.g. glue"}} output_type = "string" def forward(self, repo_id: str) -> str: data, status, err = _http_get_json(f"https://huggingface.co/api/datasets/{repo_id}", {"full": "true"}) item: Dict = {} if isinstance(data, dict): ds_id = data.get("id") or data.get("datasetId") or repo_id owner_name = ds_id.split("/")[0] if "/" in ds_id else "" visibility, access = _visibility_from_item(data) desc = "" if isinstance(data.get("cardData"), dict): desc = data["cardData"].get("summary") or data["cardData"].get("description") or "" item = _norm_common(ds_id, "dataset", owner_name, desc) item.update({ "tags": data.get("tags", []), "likes": data.get("likes", 0), "downloads": data.get("downloads", 0), "updatedAt": data.get("lastModified"), "visibility": visibility, "access": access, "cardData": data.get("cardData"), "siblings": data.get("siblings"), }) return json.dumps({"item": item, "status": status, "error": err}, ensure_ascii=False) class HFSpacesSearchTool(Tool): name = "hf_spaces_search" description = ( "Search public Spaces with query and optional filters (owner, tags CSV, sort/direction, limit). " "Good for tutorials/demos related to a topic. Defaults: limit=10. Returns JSON with `results`, `status`, `error`, and `params`." ) inputs = { "query": {"type": "string", "description": "Free-text search", "nullable": True}, "owner": {"type": "string", "description": "Filter by owner/namespace", "nullable": True}, "tags": {"type": "string", "description": "Comma-separated tags filter", "nullable": True}, "sort": {"type": "string", "description": "likes|modified", "nullable": True}, "direction": {"type": "string", "description": "descending|ascending", "nullable": True}, "limit": {"type": "number", "description": "Max results", "nullable": True}, } output_type = "string" def forward(self, query: Optional[str] = None, owner: Optional[str] = None, tags: Optional[str] = None, sort: Optional[str] = None, direction: Optional[str] = None, limit: Optional[int] = None) -> str: params = {} if query: params["search"] = query if owner: params["author"] = owner if tags: tag_list = [t.strip() for t in tags.split(",")] if isinstance(tags, str) else [] tag_list = [t for t in tag_list if t] if len(tag_list) == 1: params["tags"] = tag_list[0] elif len(tag_list) > 1: params["tags"] = tag_list if sort in {"likes", "modified", "trending"}: params["sort"] = sort if sort == "trending": params["sort"] = "likes" params["direction"] = "descending" elif direction in {"descending", "ascending"}: params["direction"] = direction lim = int(limit) if limit else 10 params["limit"] = lim data, status, err = _http_get_json("https://huggingface.co/api/spaces", params) if status == 400: minimal = {"search": query} if query else {} if limit: minimal["limit"] = int(limit) data, status, err = _http_get_json("https://huggingface.co/api/spaces", minimal) results: List[Dict] = [] if isinstance(data, list): for it in data: sp_id = _safe_get(it, "id") or _safe_get(it, "spaceId") or "" if not sp_id: continue owner_name = sp_id.split("/")[0] if "/" in sp_id else "" visibility, access = _visibility_from_item(it) norm = _norm_common(sp_id, "space", owner_name, "") # Try to extract Space runtime (sdk, app file) when available in list norm.update({ "tags": _safe_get(it, "tags", []), "likes": _safe_get(it, "likes", 0), "downloads": _safe_get(it, "downloads", 0), "updatedAt": _safe_get(it, "lastModified"), "visibility": visibility, "access": access, }) results.append(norm) return json.dumps({"results": results, "status": status, "error": err, "params": params}, ensure_ascii=False) class HFSpaceInfoTool(Tool): name = "hf_space_info" description = ( "Get detailed Space info by repo id (owner/name). Use to inspect tags, likes, and card details after a Space search." ) inputs = {"repo_id": {"type": "string", "description": "Space repo id, e.g. user/space-name"}} output_type = "string" def forward(self, repo_id: str) -> str: data, status, err = _http_get_json(f"https://huggingface.co/api/spaces/{repo_id}", {"full": "true"}) item: Dict = {} if isinstance(data, dict): sp_id = data.get("id") or data.get("spaceId") or repo_id owner_name = sp_id.split("/")[0] if "/" in sp_id else "" visibility, access = _visibility_from_item(data) desc = "" if isinstance(data.get("cardData"), dict): desc = data["cardData"].get("summary") or data["cardData"].get("description") or "" item = _norm_common(sp_id, "space", owner_name, desc) item.update({ "tags": data.get("tags", []), "likes": data.get("likes", 0), "downloads": data.get("downloads", 0), "updatedAt": data.get("lastModified"), "visibility": visibility, "access": access, "cardData": data.get("cardData"), "siblings": data.get("siblings"), }) return json.dumps({"item": item, "status": status, "error": err}, ensure_ascii=False) class HFUserInfoTool(Tool): name = "hf_user_info" description = ( "Fetch public user/org profile by username. Helpful to scope searches by owner or explore maintainers." ) inputs = {"username": {"type": "string", "description": "User or organization name"}} output_type = "string" def forward(self, username: str) -> str: data, status, err = _http_get_json(f"https://huggingface.co/api/users/{username}") item = data if isinstance(data, dict) else {} visibility = "public" access = "accessible" if status == 200 else "no_access" return json.dumps({"item": item, "status": status, "error": err, "visibility": visibility, "access": access}, ensure_ascii=False) class HFCollectionsListTool(Tool): name = "hf_collections_list" description = ( "List public collections, optionally filtered by owner/namespace. Use to surface curated sets of repos. " "Owner may be an object; URL is normalized to https://huggingface.co/collections/{owner_name}/{slug}." ) inputs = {"owner": {"type": "string", "description": "Filter by collection owner/namespace", "nullable": True}} output_type = "string" def forward(self, owner: Optional[str] = None) -> str: params = {} if owner: params["owner"] = owner data, status, err = _http_get_json("https://huggingface.co/api/collections", params) results = data if isinstance(data, list) else [] # Normalize minimally items: List[Dict] = [] for it in results: cid = _safe_get(it, "id") or _safe_get(it, "slug") or "" ns_val = _safe_get(it, "owner") or _safe_get(it, "namespace") or "" if isinstance(ns_val, dict): ns = ns_val.get("name") or ns_val.get("fullname") or "" else: ns = ns_val url = "" if ns and cid: # Some APIs return id as "{namespace}/{slug}", so extract slug part only slug = cid.split("/")[-1] url = f"https://huggingface.co/collections/{ns}/{slug}" items.append({ "type": "collection", "id": cid, "owner": ns, "title": _safe_get(it, "title", ""), "url": url, "visibility": "public", "access": "accessible", }) return json.dumps({"results": items, "status": status, "error": err}, ensure_ascii=False) class HFCollectionGetTool(Tool): name = "hf_collection_get" description = ( "Get collection details by namespace and slug id (as in URL). Use after listing to inspect items." ) inputs = { "namespace": {"type": "string", "description": "Collection owner/namespace"}, "slug_id": {"type": "string", "description": "slug-id part as shown in URL"}, } output_type = "string" def forward(self, namespace: str, slug_id: str) -> str: data, status, err = _http_get_json(f"https://huggingface.co/api/collections/{namespace}/{slug_id}") item = data if isinstance(data, dict) else {} return json.dumps({"item": item, "status": status, "error": err}, ensure_ascii=False) class HFPaperInfoTool(Tool): name = "hf_paper_info" description = ( "Fetch paper metadata by arXiv id (e.g., 1706.03762). Combine with hf_paper_repos to find related repos." ) inputs = {"arxiv_id": {"type": "string", "description": "arXiv identifier, e.g. 1706.03762"}} output_type = "string" def forward(self, arxiv_id: str) -> str: data, status, err = _http_get_json(f"https://huggingface.co/api/papers/{arxiv_id}") item = data if isinstance(data, dict) else {} return json.dumps({"item": item, "status": status, "error": err}, ensure_ascii=False) class HFPaperReposTool(Tool): name = "hf_paper_repos" description = ( "List repos (models/datasets/spaces) referencing an arXiv id. Use alongside hf_paper_info to map research → repos." ) inputs = {"arxiv_id": {"type": "string", "description": "arXiv identifier, e.g. 1706.03762"}} output_type = "string" def forward(self, arxiv_id: str) -> str: data, status, err = _http_get_json(f"https://huggingface.co/api/arxiv/{arxiv_id}/repos") results = data if isinstance(data, list) else [] return json.dumps({"results": results, "status": status, "error": err}, ensure_ascii=False) class HFDailyPapersTool(Tool): name = "hf_daily_papers" description = ( "Get the daily curated papers list from Hugging Face. Useful for current research trends." ) inputs = {} output_type = "string" def forward(self) -> str: # type: ignore[override] data, status, err = _http_get_json("https://huggingface.co/api/daily_papers") results = data if isinstance(data, list) else [] return json.dumps({"results": results, "status": status, "error": err}, ensure_ascii=False) class HFRepoInfoTool(Tool): name = "hf_repo_info" description = ( "Generic repo info for model|dataset|space by id. Use if you already know the type and want raw item metadata." ) inputs = { "repo_type": {"type": "string", "description": "model|dataset|space"}, "repo_id": {"type": "string", "description": "Owner/name or id"}, } output_type = "string" def forward(self, repo_type: str, repo_id: str) -> str: repo_type = (repo_type or "").strip().lower() if repo_type not in {"model", "dataset", "space"}: return json.dumps({"error": "invalid_repo_type", "status": 400}) base = {"model": "models", "dataset": "datasets", "space": "spaces"}[repo_type] data, status, err = _http_get_json(f"https://huggingface.co/api/{base}/{repo_id}", {"full": "true"}) item = data if isinstance(data, dict) else {} return json.dumps({"item": item, "status": status, "error": err}, ensure_ascii=False) class HFSiteSearchTool(Tool): name = "hf_site_search" description = ( "Search within huggingface.co for blogs, Learn pages, and posts (DuckDuckGo). Prefer this for tutorials and docs not covered by Hub APIs. " "Defaults: limit=10 to reduce rate limiting. Returns JSON with `results`, `status`, and `error`." ) inputs = { "query": {"type": "string", "description": "Search query. 'site:huggingface.co' will be added if missing."}, "limit": {"type": "number", "description": "Max results (default 20)", "nullable": True}, } output_type = "string" def forward(self, query: str, limit: Optional[int] = None) -> str: try: from duckduckgo_search import DDGS except Exception: return json.dumps({"results": [], "status": 500, "error": "duckduckgo_search_not_installed"}) q = f"site:huggingface.co {query}" if "huggingface.co" not in query else query lim = int(limit) if limit else 10 results: List[Dict] = [] try: with DDGS() as ddgs: for r in ddgs.text(q, safesearch="moderate", timelimit=None, max_results=lim): if not isinstance(r, dict): continue results.append({ "type": "site", "title": r.get("title"), "url": r.get("href"), "snippet": r.get("body"), "date": r.get("date"), }) except Exception as e: return json.dumps({"results": [], "status": 500, "error": str(e)}) return json.dumps({"results": results, "status": 200, "error": ""}, ensure_ascii=False) class HFReportGenerateTool(Tool): name = "hf_report_generate" description = ( "Generate a full HTML report from aggregated JSON (string). The app prefers its own dashboard, but this can render custom summaries." ) inputs = { "data_json": {"type": "string", "description": "Aggregated search results JSON"}, "title": {"type": "string", "description": "Report title", "nullable": True}, } output_type = "string" def forward(self, data_json: str, title: Optional[str] = None) -> str: try: data = json.loads(data_json) if data_json else {} except Exception as e: data = {"parse_error": str(e)} title = title or "Hugging Face Research Report" def card_html(item: Dict) -> str: badge = "" vis = item.get("visibility") access = item.get("access") if vis in {"private", "gated"} or access == "no_access": badge = f"{vis or 'restricted'}" meta = [] if item.get("task"): meta.append(f"Task: {item['task']}") if item.get("tags"): meta.append(f"Tags: {', '.join(item['tags'][:5])}") if item.get("downloads") is not None: meta.append(f"⬇️ {item['downloads']}") if item.get("likes") is not None: meta.append(f"❤️ {item['likes']}") if item.get("updatedAt"): meta.append(f"Updated: {item['updatedAt']}") desc = (item.get("description") or "").strip() if len(desc) > 220: desc = desc[:217] + "..." return ( "
" f"
{item.get('id')} {badge}
" f"
{item.get('type','')} • {item.get('owner','')}
" f"
{desc}
" f"
{' | '.join(meta)}
" "
" ) def section(title_text: str, items: List[Dict]) -> str: if not items: return "" cards = "\n".join(card_html(it) for it in items) return f"

{title_text}

{cards}
" # Accept either a dict with category keys or a flat list models = data.get("models") or data.get("Models") or [] datasets = data.get("datasets") or data.get("Datasets") or [] spaces = data.get("spaces") or data.get("Spaces") or [] papers = data.get("papers") or data.get("Papers") or [] daily_papers = data.get("daily_papers") or data.get("DailyPapers") or [] users = data.get("users") or data.get("Users") or [] collections = data.get("collections") or data.get("Collections") or [] site = data.get("site") or data.get("Site") or [] html = f""" {title}

{title}

Generated by Hugging Search
{section("Models", models)} {section("Datasets", datasets)} {section("Spaces", spaces)} {section("Papers", papers)} {section("Daily Papers", daily_papers)} {section("Users", users)} {section("Collections", collections)} {section("Site results", site)}
""" return html class HFDashboardReportTool(Tool): name = "hf_generate_dashboard_report" description = ( "One-click dashboard report from a query. Fetches public models/datasets/spaces/daily_papers and returns a full HTML dashboard." ) inputs = { "query": {"type": "string", "description": "User intent / keywords to search across Hub"}, "limit": {"type": "number", "description": "Max results per category (default 20)", "nullable": True}, } output_type = "string" def forward(self, query: str, limit: Optional[int] = None) -> str: lim = int(limit) if limit else 20 params_common = {"search": query, "sort": "downloads", "direction": "descending", "limit": lim} # Fetch categories m_data, m_status, _ = _http_get_json("https://huggingface.co/api/models", params_common) d_data, d_status, _ = _http_get_json("https://huggingface.co/api/datasets", params_common) s_data, s_status, _ = _http_get_json("https://huggingface.co/api/spaces", {"search": query, "sort": "likes", "direction": "descending", "limit": lim}) dp_data, dp_status, _ = _http_get_json("https://huggingface.co/api/daily_papers") models: List[Dict] = [] if isinstance(m_data, list): for it in m_data[:lim]: model_id = _safe_get(it, "id") or _safe_get(it, "modelId") or "" if not model_id: continue owner_name = model_id.split("/")[0] if "/" in model_id else "" visibility, access = _visibility_from_item(it) norm = _norm_common(model_id, "model", owner_name, "") norm.update({ "tags": _safe_get(it, "tags", []), "task": _safe_get(it, "pipeline_tag"), "likes": _safe_get(it, "likes", 0), "downloads": _safe_get(it, "downloads", 0), "updatedAt": _safe_get(it, "lastModified"), "visibility": visibility, "access": access, }) models.append(norm) datasets: List[Dict] = [] if isinstance(d_data, list): for it in d_data[:lim]: ds_id = _safe_get(it, "id") or _safe_get(it, "datasetId") or "" if not ds_id: continue owner_name = ds_id.split("/")[0] if "/" in ds_id else "" visibility, access = _visibility_from_item(it) norm = _norm_common(ds_id, "dataset", owner_name, "") norm.update({ "tags": _safe_get(it, "tags", []), "likes": _safe_get(it, "likes", 0), "downloads": _safe_get(it, "downloads", 0), "updatedAt": _safe_get(it, "lastModified"), "visibility": visibility, "access": access, }) datasets.append(norm) spaces: List[Dict] = [] if isinstance(s_data, list): for it in s_data[:lim]: sp_id = _safe_get(it, "id") or _safe_get(it, "spaceId") or "" if not sp_id: continue owner_name = sp_id.split("/")[0] if "/" in sp_id else "" visibility, access = _visibility_from_item(it) norm = _norm_common(sp_id, "space", owner_name, "") norm.update({ "tags": _safe_get(it, "tags", []), "likes": _safe_get(it, "likes", 0), "downloads": _safe_get(it, "downloads", 0), "updatedAt": _safe_get(it, "lastModified"), "visibility": visibility, "access": access, }) spaces.append(norm) papers = dp_data if isinstance(dp_data, list) else [] # Build dashboard HTML def card_html(item: Dict) -> str: badge = "" if item.get("visibility") in {"private", "gated"} or item.get("access") == "no_access": badge = f"{item.get('visibility','restricted')}" meta = [] if item.get("task"): meta.append(f"Task: {item['task']}") if item.get("tags"): meta.append(f"Tags: {', '.join(item['tags'][:5])}") if item.get("downloads") is not None: meta.append(f"⬇️ {item['downloads']}") if item.get("likes") is not None: meta.append(f"❤️ {item['likes']}") if item.get("updatedAt"): meta.append(f"Updated: {item['updatedAt']}") desc = (item.get("description") or "").strip() if len(desc) > 200: desc = desc[:197] + "..." return ( "
" f"
{item.get('id')} {badge}
" f"
{item.get('type','')} • {item.get('owner','')}
" f"
{desc}
" f"
{' | '.join(meta)}
" "
" ) def section(title_text: str, items: List[Dict]) -> str: if not items: return "" cards = "\n".join(card_html(it) for it in items) return f"

{title_text}

{cards}
" html = f""" Hugging Search — Dashboard
Hugging Search — Dashboard
Query: {query}
Models: {len(models)}
Datasets: {len(datasets)}
Spaces: {len(spaces)}
Daily papers: {len(papers) if isinstance(papers,list) else 0}
{section("Models", models)} {section("Datasets", datasets)} {section("Spaces", spaces)}
""" return html