Spaces:

Agents-MCP-Hackathon
/

HuggingFaceDoc

Running

File size: 42,495 Bytes

import gradio as gr
import requests
from bs4 import BeautifulSoup
import json
from typing import List, Dict, Any, Optional
import re
from urllib.parse import urljoin
import time
import functools
import logging
from datetime import datetime, timedelta

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Renamed class for brevity to avoid long tool names
class HF_API:
    def __init__(self):
        self.base_url = "https://huggingface.co"
        self.docs_url = "https://huggingface.co/docs"
        self.api_url = "https://huggingface.co/api"
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'HF-Info-Server/1.0 (Educational Purpose)',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Accept-Encoding': 'gzip, deflate',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1'
        })
        self.cache = {}
        self.cache_ttl = 3600  # 1 hour cache TTL

    def _is_cache_valid(self, cache_key: str) -> bool:
        if cache_key not in self.cache:
            return False
        cache_time = self.cache[cache_key].get('timestamp', 0)
        return time.time() - cache_time < self.cache_ttl

    def _get_from_cache(self, cache_key: str) -> Optional[str]:
        if self._is_cache_valid(cache_key):
            return self.cache[cache_key]['content']
        return None

    def _store_in_cache(self, cache_key: str, content: str):
        self.cache[cache_key] = {
            'content': content,
            'timestamp': time.time()
        }

    def _fetch_with_retry(self, url: str, max_retries: int = 3) -> Optional[str]:
        cache_key = f"url_{hash(url)}"
        cached_content = self._get_from_cache(cache_key)
        if cached_content:
            logger.info(f"Cache hit for {url}")
            return cached_content
        for attempt in range(max_retries):
            try:
                logger.info(f"Fetching {url} (attempt {attempt + 1})")
                response = self.session.get(url, timeout=20)
                response.raise_for_status()
                content = response.text
                self._store_in_cache(cache_key, content)
                return content
            except requests.exceptions.RequestException as e:
                logger.warning(f"Attempt {attempt + 1} failed for {url}: {e}")
                if attempt < max_retries - 1:
                    time.sleep(2 ** attempt)
                else:
                    logger.error(f"All attempts failed for {url}")
                    return None
        return None

    def _extract_code_examples(self, soup: BeautifulSoup) -> List[Dict[str, str]]:
        code_blocks = []
        code_elements = soup.find_all(['code', 'pre'])
        for code_elem in code_elements:
            lang_class = code_elem.get('class', [])
            language = 'python'
            for cls in lang_class:
                if 'language-' in str(cls):
                    language = str(cls).replace('language-', '')
                    break
                elif any(lang in str(cls).lower() for lang in ['python', 'bash', 'javascript', 'json']):
                    language = str(cls).lower()
                    break
            code_text = code_elem.get_text(strip=True)
            if len(code_text) > 20 and any(keyword in code_text.lower() for keyword in ['import', 'from', 'def', 'class', 'pip install', 'transformers']):
                code_blocks.append({'code': code_text, 'language': language, 'type': 'usage' if any(word in code_text.lower() for word in ['import', 'load', 'pipeline']) else 'example'})
        highlight_blocks = soup.find_all('div', class_=re.compile(r'highlight|code-block|language'))
        for block in highlight_blocks:
            code_text = block.get_text(strip=True)
            if len(code_text) > 20:
                code_blocks.append({'code': code_text, 'language': 'python', 'type': 'example'})
        seen = set()
        unique_blocks = []
        for block in code_blocks:
            code_hash = hash(block['code'][:100])
            if code_hash not in seen:
                seen.add(code_hash)
                unique_blocks.append(block)
                if len(unique_blocks) >= 5:
                    break
        return unique_blocks

    def _extract_practical_content(self, soup: BeautifulSoup, topic: str) -> Dict[str, Any]:
        content = {'overview': '', 'code_examples': [], 'usage_instructions': [], 'parameters': [], 'methods': [], 'installation': '', 'quickstart': ''}
        main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile(r'content|docs|prose'))
        if not main_content:
            return content
        overview_sections = main_content.find_all('p', limit=5)
        overview_texts = []
        for p in overview_sections:
            text = p.get_text(strip=True)
            if len(text) > 30 and not text.startswith('Table of contents'):
                overview_texts.append(text)
        if overview_texts:
            overview = ' '.join(overview_texts)
            content['overview'] = overview[:1000] + "..." if len(overview) > 1000 else overview
        content['code_examples'] = self._extract_code_examples(main_content)
        install_headings = main_content.find_all(['h1', 'h2', 'h3', 'h4'], string=re.compile(r'install|setup|getting started', re.IGNORECASE))
        for heading in install_headings:
            next_elem = heading.find_next_sibling()
            install_text = []
            while next_elem and next_elem.name not in ['h1', 'h2', 'h3', 'h4'] and len(install_text) < 3:
                if next_elem.name in ['p', 'pre', 'code']:
                    text = next_elem.get_text(strip=True)
                    if text and len(text) > 10:
                        install_text.append(text)
                next_elem = next_elem.find_next_sibling()
            if install_text:
                content['installation'] = ' '.join(install_text)
                break
        usage_headings = main_content.find_all(['h1', 'h2', 'h3', 'h4'])
        for heading in usage_headings:
            heading_text = heading.get_text(strip=True).lower()
            if any(keyword in heading_text for keyword in ['usage', 'example', 'how to', 'quickstart', 'getting started']):
                next_elem = heading.find_next_sibling()
                instruction_parts = []
                while next_elem and next_elem.name not in ['h1', 'h2', 'h3', 'h4']:
                    if next_elem.name in ['p', 'li', 'div', 'ol', 'ul']:
                        text = next_elem.get_text(strip=True)
                        if text and len(text) > 15:
                            instruction_parts.append(text)
                    next_elem = next_elem.find_next_sibling()
                    if len(instruction_parts) >= 5:
                        break
                if instruction_parts:
                    content['usage_instructions'].extend(instruction_parts)
        tables = main_content.find_all('table')
        for table in tables:
            headers = [th.get_text(strip=True).lower() for th in table.find_all('th')]
            if any(keyword in ' '.join(headers) for keyword in ['parameter', 'argument', 'option', 'attribute', 'name', 'type']):
                rows = table.find_all('tr')[1:]
                for row in rows[:8]:
                    cells = [td.get_text(strip=True) for td in row.find_all('td')]
                    if len(cells) >= 2:
                        param_info = {'name': cells[0], 'description': cells[1] if len(cells) > 1 else '', 'type': cells[2] if len(cells) > 2 else '', 'default': cells[3] if len(cells) > 3 else ''}
                        content['parameters'].append(param_info)
        return content

    def search_documentation(self, query: str, max_results: int = 3) -> str:
        """
        Searches the official Hugging Face documentation for a specific topic and returns a summary.
        This tool is useful for finding how-to guides, explanations of concepts like 'pipeline' or 'tokenizer', and usage examples.
        Args:
            query (str): The topic or keyword to search for in the documentation (e.g., 'fine-tuning', 'peft', 'datasets').
            max_results (int): The maximum number of documentation pages to retrieve and summarize. Defaults to 3.
        """
        try:
            max_results = int(max_results) if isinstance(max_results, str) else max_results
            max_results = min(max_results, 5)
            query_lower = query.lower().strip()
            if not query_lower:
                return "Please provide a search query."
            doc_sections = {
                'transformers': {'base_url': 'https://huggingface.co/docs/transformers', 'topics': {'pipeline': '/main_classes/pipelines', 'tokenizer': '/main_classes/tokenizer', 'trainer': '/main_classes/trainer', 'model': '/main_classes/model', 'quicktour': '/quicktour', 'installation': '/installation', 'fine-tuning': '/training', 'training': '/training', 'inference': '/main_classes/pipelines', 'preprocessing': '/preprocessing', 'tutorial': '/tutorials', 'configuration': '/main_classes/configuration', 'peft': '/peft', 'lora': '/peft', 'quantization': '/main_classes/quantization', 'generation': '/main_classes/text_generation', 'optimization': '/perf_train_gpu_one', 'deployment': '/deployment', 'custom': '/custom_models'}},
                'datasets': {'base_url': 'https://huggingface.co/docs/datasets', 'topics': {'loading': '/load_hub', 'load': '/load_hub', 'processing': '/process', 'streaming': '/stream', 'audio': '/audio_process', 'image': '/image_process', 'text': '/nlp_process', 'arrow': '/about_arrow', 'cache': '/cache', 'upload': '/upload_dataset', 'custom': '/dataset_script'}},
                'diffusers': {'base_url': 'https://huggingface.co/docs/diffusers', 'topics': {'pipeline': '/using-diffusers/loading', 'stable diffusion': '/using-diffusers/stable_diffusion', 'controlnet': '/using-diffusers/controlnet', 'inpainting': '/using-diffusers/inpaint', 'training': '/training/overview', 'optimization': '/optimization/fp16', 'schedulers': '/using-diffusers/schedulers'}},
                'hub': {'base_url': 'https://huggingface.co/docs/hub', 'topics': {'repositories': '/repositories', 'git': '/repositories-getting-started', 'spaces': '/spaces', 'models': '/models', 'datasets': '/datasets'}}
            }
            relevant_urls = []
            for section_name, section_data in doc_sections.items():
                base_url = section_data['base_url']
                topics = section_data['topics']
                for topic, path in topics.items():
                    relevance = 0
                    if query_lower == topic.lower(): relevance = 1.0
                    elif query_lower in topic.lower(): relevance = 0.9
                    elif any(word in topic.lower() for word in query_lower.split()): relevance = 0.7
                    elif any(word in query_lower for word in topic.lower().split()): relevance = 0.6
                    if relevance > 0:
                        full_url = base_url + path
                        relevant_urls.append({'url': full_url, 'topic': topic, 'section': section_name, 'relevance': relevance})
            relevant_urls.sort(key=lambda x: x['relevance'], reverse=True)
            relevant_urls = relevant_urls[:max_results]
            if not relevant_urls:
                return f"❌ No documentation found for '{query}'. Try: pipeline, tokenizer, trainer, model, fine-tuning, datasets, diffusers, or peft."
            result = f"# 📚 Hugging Face Documentation: {query}\n\n"
            for i, url_info in enumerate(relevant_urls, 1):
                section_emoji = {'transformers': '🤖', 'datasets': '📊', 'diffusers': '🎨', 'hub': '🌐'}.get(url_info['section'], '📄')
                result += f"## {i}. {section_emoji} {url_info['topic'].title()} ({url_info['section'].title()})\n\n"
                content = self._fetch_with_retry(url_info['url'])
                if content:
                    soup = BeautifulSoup(content, 'html.parser')
                    practical_content = self._extract_practical_content(soup, url_info['topic'])
                    if practical_content['overview']: result += f"**📖 Overview:**\n{practical_content['overview']}\n\n"
                    if practical_content['installation']: result += f"**⚙️ Installation:**\n{practical_content['installation']}\n\n"
                    if practical_content['code_examples']:
                        result += "**💻 Code Examples:**\n\n"
                        for j, code_block in enumerate(practical_content['code_examples'][:3], 1):
                            lang = code_block.get('language', 'python')
                            code_type = code_block.get('type', 'example')
                            result += f"*{code_type.title()} {j}:*\n```{lang}\n{code_block['code']}\n```\n\n"
                    if practical_content['usage_instructions']:
                        result += "**🛠️ Usage Instructions:**\n"
                        for idx, instruction in enumerate(practical_content['usage_instructions'][:4], 1):
                            result += f"{idx}. {instruction}\n"
                        result += "\n"
                    if practical_content['parameters']:
                        result += "**⚙️ Parameters:**\n"
                        for param in practical_content['parameters'][:6]:
                            param_type = f" (`{param['type']}`)" if param.get('type') else ""
                            default_val = f" *Default: {param['default']}*" if param.get('default') else ""
                            result += f"• **{param['name']}**{param_type}: {param['description']}{default_val}\n"
                        result += "\n"
                    result += f"**🔗 Full Documentation:** {url_info['url']}\n\n"
                else:
                    result += f"⚠️ Could not fetch content. Visit directly: {url_info['url']}\n\n"
                result += "---\n\n"
            return result
        except Exception as e:
            logger.error(f"Error in search_documentation: {e}")
            return f"❌ Error searching documentation: {str(e)}\n\nTry a simpler search term or check your internet connection."

    def get_model_info(self, model_name: str) -> str:
        """
        Fetches comprehensive information about a specific model from the Hugging Face Hub.
        Provides statistics like downloads and likes, a description, usage examples, and a quick-start code snippet.
        Args:
            model_name (str): The full identifier of the model on the Hub, such as 'bert-base-uncased' or 'meta-llama/Llama-2-7b-hf'.
        """
        try:
            model_name = model_name.strip()
            if not model_name: return "Please provide a model name."
            api_url = f"{self.api_url}/models/{model_name}"
            response = self.session.get(api_url, timeout=15)
            if response.status_code == 404: return f"❌ Model '{model_name}' not found. Please check the model name."
            elif response.status_code != 200: return f"❌ Error fetching model info (Status: {response.status_code})"
            model_data = response.json()
            result = f"# 🤖 Model: {model_name}\n\n"
            downloads = model_data.get('downloads', 0)
            likes = model_data.get('likes', 0)
            task = model_data.get('pipeline_tag', 'N/A')
            library = model_data.get('library_name', 'N/A')
            result += f"**📊 Statistics:**\n• **Downloads:** {downloads:,}\n• **Likes:** {likes:,}\n• **Task:** {task}\n• **Library:** {library}\n• **Created:** {model_data.get('createdAt', 'N/A')[:10]}\n• **Updated:** {model_data.get('lastModified', 'N/A')[:10]}\n\n"
            if 'tags' in model_data and model_data['tags']: result += f"**🏷️ Tags:** {', '.join(model_data['tags'][:10])}\n\n"
            model_url = f"{self.base_url}/{model_name}"
            page_content = self._fetch_with_retry(model_url)
            if page_content:
                soup = BeautifulSoup(page_content, 'html.parser')
                readme_content = soup.find('div', class_=re.compile(r'prose|readme|model-card'))
                if readme_content:
                    paragraphs = readme_content.find_all('p')[:3]
                    description_parts = []
                    for p in paragraphs:
                        text = p.get_text(strip=True)
                        if len(text) > 30 and not any(skip in text.lower() for skip in ['table of contents', 'toc']):
                            description_parts.append(text)
                    if description_parts:
                        description = ' '.join(description_parts)
                        result += f"**📝 Description:**\n{description[:800]}{'...' if len(description) > 800 else ''}\n\n"
                code_examples = self._extract_code_examples(soup)
                if code_examples:
                    result += "**💻 Usage Examples:**\n\n"
                    for i, code_block in enumerate(code_examples[:3], 1):
                        lang = code_block.get('language', 'python')
                        result += f"*Example {i}:*\n```{lang}\n{code_block['code']}\n```\n\n"
            if task and task != 'N/A':
                result += f"**🚀 Quick Start Template:**\n"
                if library == 'transformers':
                    result += f"```python\nfrom transformers import pipeline\n\n# Load the model\nmodel = pipeline('{task}', model='{model_name}')\n\n# Use the model\n# result = model(your_input_here)\n# print(result)\n```\n\n"
                else:
                    result += f"```python\n# Load and use {model_name}\n# Refer to the documentation for specific usage\n```\n\n"
            if 'siblings' in model_data:
                files = [f['rfilename'] for f in model_data['siblings'][:10]]
                if files:
                    result += f"**📁 Model Files:** {', '.join(files)}\n\n"
            result += f"**🔗 Model Page:** {model_url}\n"
            return result
        except requests.exceptions.RequestException as e: return f"❌ Network error: {str(e)}"
        except Exception as e:
            logger.error(f"Error in get_model_info: {e}")
            return f"❌ Error fetching model info: {str(e)}"

    def get_dataset_info(self, dataset_name: str) -> str:
        """
        Retrieves detailed information about a specific dataset from the Hugging Face Hub.
        Includes statistics, a description, and a quick-start code snippet showing how to load the dataset.
        Args:
            dataset_name (str): The full identifier of the dataset on the Hub, for example 'squad' or 'imdb'.
        """
        try:
            dataset_name = dataset_name.strip()
            if not dataset_name: return "Please provide a dataset name."
            api_url = f"{self.api_url}/datasets/{dataset_name}"
            response = self.session.get(api_url, timeout=15)
            if response.status_code == 404: return f"❌ Dataset '{dataset_name}' not found. Please check the dataset name."
            elif response.status_code != 200: return f"❌ Error fetching dataset info (Status: {response.status_code})"
            dataset_data = response.json()
            result = f"# 📊 Dataset: {dataset_name}\n\n"
            downloads = dataset_data.get('downloads', 0)
            likes = dataset_data.get('likes', 0)
            result += f"**📈 Statistics:**\n• **Downloads:** {downloads:,}\n• **Likes:** {likes:,}\n• **Created:** {dataset_data.get('createdAt', 'N/A')[:10]}\n• **Updated:** {dataset_data.get('lastModified', 'N/A')[:10]}\n\n"
            if 'tags' in dataset_data and dataset_data['tags']: result += f"**🏷️ Tags:** {', '.join(dataset_data['tags'][:10])}\n\n"
            dataset_url = f"{self.base_url}/datasets/{dataset_name}"
            page_content = self._fetch_with_retry(dataset_url)
            if page_content:
                soup = BeautifulSoup(page_content, 'html.parser')
                readme_content = soup.find('div', class_=re.compile(r'prose|readme|dataset-card'))
                if readme_content:
                    paragraphs = readme_content.find_all('p')[:3]
                    description_parts = []
                    for p in paragraphs:
                        text = p.get_text(strip=True)
                        if len(text) > 30: description_parts.append(text)
                    if description_parts:
                        description = ' '.join(description_parts)
                        result += f"**📝 Description:**\n{description[:800]}{'...' if len(description) > 800 else ''}\n\n"
                code_examples = self._extract_code_examples(soup)
                if code_examples:
                    result += "**💻 Usage Examples:**\n\n"
                    for i, code_block in enumerate(code_examples[:3], 1):
                        lang = code_block.get('language', 'python')
                        result += f"*Example {i}:*\n```{lang}\n{code_block['code']}\n```\n\n"
            result += f"**🚀 Quick Start Template:**\n"
            result += f"```python\nfrom datasets import load_dataset\n\n# Load the dataset\ndataset = load_dataset('{dataset_name}')\n\n# Explore the dataset\n# print(dataset)\n# print(f\"Dataset keys: {{list(dataset.keys())}}\")\n\n# Access first example\n# if 'train' in dataset:\n#     print(\"First example:\")\n#     print(dataset['train'][0])\n```\n\n"
            result += f"**🔗 Dataset Page:** {dataset_url}\n"
            return result
        except requests.exceptions.RequestException as e: return f"❌ Network error: {str(e)}"
        except Exception as e:
            logger.error(f"Error in get_dataset_info: {e}")
            return f"❌ Error fetching dataset info: {str(e)}"

    def search_models(self, task: str, limit: str = "5") -> str:
        """
        Searches the Hugging Face Hub for models based on a specified task or keyword and returns a list of top models.
        Each result includes statistics and a quick usage example.
        Args:
            task (str): The task to search for, such as 'text-classification', 'image-generation', or 'question-answering'.
            limit (str): The maximum number of models to return. Defaults to '5'.
        """
        try:
            task = task.strip()
            if not task: return "Please provide a search task or keyword."
            limit = int(limit) if isinstance(limit, str) and limit.isdigit() else 5
            limit = min(max(limit, 1), 10)
            params = {'search': task, 'limit': limit * 3, 'sort': 'downloads', 'direction': -1}
            response = self.session.get(f"{self.api_url}/models", params=params, timeout=20)
            response.raise_for_status()
            models = response.json()
            if not models: return f"❌ No models found for task: '{task}'. Try different keywords."
            filtered_models = []
            for model in models:
                if (model.get('downloads', 0) > 0 or model.get('likes', 0) > 0 or 'pipeline_tag' in model):
                    filtered_models.append(model)
                    if len(filtered_models) >= limit: break
            if not filtered_models: filtered_models = models[:limit]
            result = f"# 🔍 Top {len(filtered_models)} Models for '{task}'\n\n"
            for i, model in enumerate(filtered_models, 1):
                model_id = model.get('id', 'Unknown')
                downloads = model.get('downloads', 0)
                likes = model.get('likes', 0)
                task_type = model.get('pipeline_tag', 'N/A')
                library = model.get('library_name', 'N/A')
                quality_score = ""
                if downloads > 10000: quality_score = "⭐ Popular"
                elif downloads > 1000: quality_score = "🔥 Active"
                elif likes > 10: quality_score = "👍 Liked"
                result += f"## {i}. {model_id} {quality_score}\n\n"
                result += f"**📊 Stats:**\n• **Downloads:** {downloads:,}\n• **Likes:** {likes}\n• **Task:** {task_type}\n• **Library:** {library}\n\n"
                if task_type and task_type != 'N/A':
                    result += f"**🚀 Quick Usage:**\n"
                    if library == 'transformers':
                        result += f"```python\nfrom transformers import pipeline\n\n# Load model\nmodel = pipeline('{task_type}', model='{model_id}')\n\n# Use model\n# result = model(\"Your input here\")\n# print(result)\n```\n\n"
                    else:
                        result += f"```python\n# Load and use {model_id}\n# Check model page for specific usage instructions\n```\n\n"
                result += f"**🔗 Model Page:** {self.base_url}/{model_id}\n\n---\n\n"
            return result
        except requests.exceptions.RequestException as e: return f"❌ Network error: {str(e)}"
        except Exception as e:
            logger.error(f"Error in search_models: {e}")
            return f"❌ Error searching models: {str(e)}"

    def get_transformers_docs(self, topic: str) -> str:
        """
        Fetches detailed documentation specifically for the Hugging Face Transformers library on a given topic.
        This provides in-depth explanations, code examples, and parameter descriptions for core library components.
        Args:
            topic (str): The Transformers library topic to look up, such as 'pipeline', 'tokenizer', 'trainer', or 'generation'.
        """
        try:
            topic = topic.strip().lower()
            if not topic: return "Please provide a topic to search for."
            docs_url = "https://huggingface.co/docs/transformers"
            topic_map = {'pipeline': f"{docs_url}/main_classes/pipelines", 'pipelines': f"{docs_url}/main_classes/pipelines", 'tokenizer': f"{docs_url}/main_classes/tokenizer", 'tokenizers': f"{docs_url}/main_classes/tokenizer", 'trainer': f"{docs_url}/main_classes/trainer", 'training': f"{docs_url}/training", 'model': f"{docs_url}/main_classes/model", 'models': f"{docs_url}/main_classes/model", 'configuration': f"{docs_url}/main_classes/configuration", 'config': f"{docs_url}/main_classes/configuration", 'quicktour': f"{docs_url}/quicktour", 'quick': f"{docs_url}/quicktour", 'installation': f"{docs_url}/installation", 'install': f"{docs_url}/installation", 'tutorial': f"{docs_url}/tutorials", 'tutorials': f"{docs_url}/tutorials", 'generation': f"{docs_url}/main_classes/text_generation", 'text_generation': f"{docs_url}/main_classes/text_generation", 'preprocessing': f"{docs_url}/preprocessing", 'preprocess': f"{docs_url}/preprocessing", 'peft': f"{docs_url}/peft", 'lora': f"{docs_url}/peft", 'quantization': f"{docs_url}/main_classes/quantization", 'optimization': f"{docs_url}/perf_train_gpu_one", 'performance': f"{docs_url}/perf_train_gpu_one", 'deployment': f"{docs_url}/deployment", 'custom': f"{docs_url}/custom_models", 'fine-tuning': f"{docs_url}/training", 'finetuning': f"{docs_url}/training"}
            url = topic_map.get(topic)
            if not url:
                for key, value in topic_map.items():
                    if topic in key or key in topic:
                        url = value
                        topic = key
                        break
            if not url:
                url = f"{docs_url}/quicktour"
                topic = "quicktour"
            content = self._fetch_with_retry(url)
            if not content: return f"❌ Could not fetch documentation for '{topic}'. Please try again or visit: {url}"
            soup = BeautifulSoup(content, 'html.parser')
            practical_content = self._extract_practical_content(soup, topic)
            result = f"# 📚 Transformers Documentation: {topic.replace('_', ' ').title()}\n\n"
            if practical_content['overview']: result += f"**📖 Overview:**\n{practical_content['overview']}\n\n"
            if practical_content['installation']: result += f"**⚙️ Installation:**\n{practical_content['installation']}\n\n"
            if practical_content['code_examples']:
                result += "**💻 Code Examples:**\n\n"
                for i, code_block in enumerate(practical_content['code_examples'][:4], 1):
                    lang = code_block.get('language', 'python')
                    code_type = code_block.get('type', 'example')
                    result += f"### {code_type.title()} {i}:\n```{lang}\n{code_block['code']}\n```\n\n"
            if practical_content['usage_instructions']:
                result += "**🛠️ Step-by-Step Usage:**\n"
                for i, instruction in enumerate(practical_content['usage_instructions'][:6], 1):
                    result += f"{i}. {instruction}\n"
                result += "\n"
            if practical_content['parameters']:
                result += "**⚙️ Key Parameters:**\n"
                for param in practical_content['parameters'][:10]:
                    param_type = f" (`{param['type']}`)" if param.get('type') else ""
                    default_val = f" *Default: `{param['default']}`*" if param.get('default') else ""
                    result += f"• **`{param['name']}`**{param_type}: {param['description']}{default_val}\n"
                result += "\n"
            related_topics = [k for k in topic_map.keys() if k != topic][:5]
            if related_topics: result += f"**🔗 Related Topics:** {', '.join(related_topics)}\n\n"
            result += f"**📄 Full Documentation:** {url}\n"
            return result
        except Exception as e:
            logger.error(f"Error in get_transformers_docs: {e}")
            return f"❌ Error fetching Transformers documentation: {str(e)}"

    def get_trending_models(self, limit: str = "10") -> str:
        """
        Fetches a list of the most downloaded models currently trending on the Hugging Face Hub.
        This is useful for discovering popular and widely-used models.
        Args:
            limit (str): The number of trending models to return. Defaults to '10'.
        """
        try:
            limit = int(limit) if isinstance(limit, str) and limit.isdigit() else 10
            limit = min(max(limit, 1), 20)
            params = {'sort': 'downloads', 'direction': -1, 'limit': limit}
            response = self.session.get(f"{self.api_url}/models", params=params, timeout=20)
            response.raise_for_status()
            models = response.json()
            if not models: return "❌ Could not fetch trending models."
            result = f"# 🔥 Trending Models (Top {len(models)})\n\n"
            for i, model in enumerate(models, 1):
                model_id = model.get('id', 'Unknown')
                downloads = model.get('downloads', 0)
                likes = model.get('likes', 0)
                task = model.get('pipeline_tag', 'N/A')
                if downloads > 1000000: trend = "🚀 Mega Popular"
                elif downloads > 100000: trend = "🔥 Very Popular"
                elif downloads > 10000: trend = "⭐ Popular"
                else: trend = "📈 Trending"
                result += f"## {i}. {model_id} {trend}\n"
                result += f"• **Downloads:** {downloads:,} | **Likes:** {likes} | **Task:** {task}\n"
                result += f"• **Link:** {self.base_url}/{model_id}\n\n"
            return result
        except Exception as e:
            logger.error(f"Error in get_trending_models: {e}")
            return f"❌ Error fetching trending models: {str(e)}"

# Initialize the API server
hf_api = HF_API()

# --- Named Functions for Gradio UI ---

def clear_output():
    """Clears a Gradio output component."""
    return ""

def set_textbox_value(text):
    """Sets a Gradio Textbox to a specific value."""
    return text

# --- Doc Search Tab Functions ---
def run_doc_search(query, max_results):
    return hf_api.search_documentation(query, int(max_results) if str(max_results).isdigit() else 2)

# --- Model Info Tab Functions ---
def run_model_info(model_name):
    return hf_api.get_model_info(model_name)

# --- Dataset Info Tab Functions ---
def run_dataset_info(dataset_name):
    return hf_api.get_dataset_info(dataset_name)

# --- Model Search Tab Functions ---
def run_model_search(task, limit):
    return hf_api.search_models(task, int(limit) if str(limit).isdigit() else 5)

# --- Transformers Docs Tab Functions ---
def run_transformers_docs(topic):
    return hf_api.get_transformers_docs(topic)
    
# --- Trending Models Tab Functions ---
def run_trending_models(limit):
    return hf_api.get_trending_models(int(limit) if str(limit).isdigit() else 10)


# --- Create Gradio Interface ---

with gr.Blocks(
    title="🤗 Hugging Face Information Server",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container { font-family: 'Inter', sans-serif; }
    .main-header { text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px; }
    """) as demo:
    # Header
    with gr.Row():
        gr.HTML("""
        <div class="main-header">
            <h1>🤗 Hugging Face Information Server</h1>
            <p>Get comprehensive documentation with <strong>real code examples</strong>, <strong>usage instructions</strong>, and <strong>practical content</strong></p>
        </div>
        """)
        
    with gr.Tab("📚 Documentation Search", elem_id="docs"):
        gr.Markdown("### Search for documentation with **comprehensive code examples** and **step-by-step instructions**")
        with gr.Row():
            with gr.Column(scale=3):
                doc_query = gr.Textbox(label="🔍 Search Query", placeholder="e.g., tokenizer, pipeline, fine-tuning, peft, trainer, quantization")
            with gr.Column(scale=1):
                doc_max_results = gr.Number(label="Max Results", value=2, minimum=1, maximum=5)
        doc_output = gr.Textbox(label="📖 Documentation with Examples", lines=25, max_lines=30)
        with gr.Row():
            doc_btn = gr.Button("🔍 Search Documentation", variant="primary", size="lg")
            doc_clear = gr.Button("🗑️ Clear", variant="secondary")
        gr.Markdown("**Quick Examples:**")
        with gr.Row():
            gr.Button("Pipeline", size="sm").click(functools.partial(set_textbox_value, "pipeline"), outputs=doc_query)
            gr.Button("Tokenizer", size="sm").click(functools.partial(set_textbox_value, "tokenizer"), outputs=doc_query)
            gr.Button("Fine-tuning", size="sm").click(functools.partial(set_textbox_value, "fine-tuning"), outputs=doc_query)
            gr.Button("PEFT", size="sm").click(functools.partial(set_textbox_value, "peft"), outputs=doc_query)
        
        doc_btn.click(run_doc_search, inputs=[doc_query, doc_max_results], outputs=doc_output)
        doc_clear.click(clear_output, outputs=doc_output)

    with gr.Tab("🤖 Model Information", elem_id="models"):
        gr.Markdown("### Get detailed model information with **usage examples** and **code snippets**")
        model_name = gr.Textbox(label="🤖 Model Name", placeholder="e.g., bert-base-uncased, gpt2, microsoft/DialoGPT-medium, meta-llama/Llama-2-7b-hf")
        model_output = gr.Textbox(label="📊 Model Information + Usage Examples", lines=25, max_lines=30)
        with gr.Row():
            model_btn = gr.Button("📊 Get Model Info", variant="primary", size="lg")
            model_clear = gr.Button("🗑️ Clear", variant="secondary")
        gr.Markdown("**Popular Models:**")
        with gr.Row():
            gr.Button("BERT", size="sm").click(functools.partial(set_textbox_value, "bert-base-uncased"), outputs=model_name)
            gr.Button("GPT-2", size="sm").click(functools.partial(set_textbox_value, "gpt2"), outputs=model_name)
            gr.Button("T5", size="sm").click(functools.partial(set_textbox_value, "t5-small"), outputs=model_name)
            gr.Button("DistilBERT", size="sm").click(functools.partial(set_textbox_value, "distilbert-base-uncased"), outputs=model_name)
        
        model_btn.click(run_model_info, inputs=model_name, outputs=model_output)
        model_clear.click(clear_output, outputs=model_output)

    with gr.Tab("📊 Dataset Information", elem_id="datasets"):
        gr.Markdown("### Get dataset information with **loading examples** and **usage code**")
        dataset_name = gr.Textbox(label="📊 Dataset Name", placeholder="e.g., squad, imdb, glue, common_voice, wikitext")
        dataset_output = gr.Textbox(label="📈 Dataset Information + Usage Examples", lines=25, max_lines=30)
        with gr.Row():
            dataset_btn = gr.Button("📈 Get Dataset Info", variant="primary", size="lg")
            dataset_clear = gr.Button("🗑️ Clear", variant="secondary")
        gr.Markdown("**Popular Datasets:**")
        with gr.Row():
            gr.Button("SQuAD", size="sm").click(functools.partial(set_textbox_value, "squad"), outputs=dataset_name)
            gr.Button("IMDB", size="sm").click(functools.partial(set_textbox_value, "imdb"), outputs=dataset_name)
            gr.Button("GLUE", size="sm").click(functools.partial(set_textbox_value, "glue"), outputs=dataset_name)
            gr.Button("Common Voice", size="sm").click(functools.partial(set_textbox_value, "common_voice"), outputs=dataset_name)
        
        dataset_btn.click(run_dataset_info, inputs=dataset_name, outputs=dataset_output)
        dataset_clear.click(clear_output, outputs=dataset_output)

    with gr.Tab("🔍 Model Search", elem_id="search"):
        gr.Markdown("### Search models with **quick usage examples** and **quality indicators**")
        with gr.Row():
            with gr.Column(scale=3):
                search_task = gr.Textbox(label="🔍 Task or Keyword", placeholder="e.g., text-classification, image-generation, question-answering, sentiment-analysis")
            with gr.Column(scale=1):
                search_limit = gr.Number(label="Max Results", value=5, minimum=1, maximum=10)
        search_output = gr.Textbox(label="🚀 Models with Usage Examples", lines=25, max_lines=30)
        with gr.Row():
            search_btn = gr.Button("🚀 Search Models", variant="primary", size="lg")
            search_clear = gr.Button("🗑️ Clear", variant="secondary")
        gr.Markdown("**Popular Tasks:**")
        with gr.Row():
            gr.Button("Text Classification", size="sm").click(functools.partial(set_textbox_value, "text-classification"), outputs=search_task)
            gr.Button("Question Answering", size="sm").click(functools.partial(set_textbox_value, "question-answering"), outputs=search_task)
            gr.Button("Text Generation", size="sm").click(functools.partial(set_textbox_value, "text-generation"), outputs=search_task)
            gr.Button("Image Classification", size="sm").click(functools.partial(set_textbox_value, "image-classification"), outputs=search_task)
            
        search_btn.click(run_model_search, inputs=[search_task, search_limit], outputs=search_output)
        search_clear.click(clear_output, outputs=search_output)

    with gr.Tab("⚡ Transformers Docs", elem_id="transformers"):
        gr.Markdown("### Get comprehensive Transformers documentation with **detailed examples** and **parameters**")
        transformers_topic = gr.Textbox(label="📚 Topic", placeholder="e.g., pipeline, tokenizer, trainer, model, peft, generation, quantization")
        transformers_output = gr.Textbox(label="📖 Comprehensive Documentation", lines=25, max_lines=30)
        with gr.Row():
            transformers_btn = gr.Button("📖 Get Documentation", variant="primary", size="lg")
            transformers_clear = gr.Button("🗑️ Clear", variant="secondary")
        gr.Markdown("**Core Topics:**")
        with gr.Row():
            gr.Button("Pipeline", size="sm").click(functools.partial(set_textbox_value, "pipeline"), outputs=transformers_topic)
            gr.Button("Tokenizer", size="sm").click(functools.partial(set_textbox_value, "tokenizer"), outputs=transformers_topic)
            gr.Button("Trainer", size="sm").click(functools.partial(set_textbox_value, "trainer"), outputs=transformers_topic)
            gr.Button("Generation", size="sm").click(functools.partial(set_textbox_value, "generation"), outputs=transformers_topic)
        
        transformers_btn.click(run_transformers_docs, inputs=transformers_topic, outputs=transformers_output)
        transformers_clear.click(clear_output, outputs=transformers_output)

    with gr.Tab("🔥 Trending Models", elem_id="trending"):
        gr.Markdown("### Discover the most popular and trending models")
        trending_limit = gr.Number(label="Number of Models", value=10, minimum=1, maximum=20)
        trending_output = gr.Textbox(label="🔥 Trending Models", lines=20, max_lines=25)
        with gr.Row():
            trending_btn = gr.Button("🔥 Get Trending Models", variant="primary", size="lg")
            trending_clear = gr.Button("🗑️ Clear", variant="secondary")
        
        trending_btn.click(run_trending_models, inputs=trending_limit, outputs=trending_output)
        trending_clear.click(clear_output, outputs=trending_output)

    # Footer
    with gr.Row():
        gr.HTML("""
        <div style="text-align: center; padding: 20px; color: #666;">
            <h3>💡 Features</h3>
            <p><strong>✅ Real code examples</strong> • <strong>✅ Step-by-step instructions</strong> • <strong>✅ Parameter documentation</strong> • <strong>✅ Quality indicators</strong></p>
            <p><em>Get practical, actionable information, directly from the source.</em></p>
            <p><a href="https://huggingface.co/spaces/Agents-MCP-Hackathon/HuggingFaceDoc/blob/main/README.md" target="_blank">📖 Read the Guide on Hugging Face Spaces</a></p>
        </div>
        """)

if __name__ == "__main__":
    print("🚀 Starting Hugging Face Information Server...")
    print("📊 Features: Code examples, usage instructions, comprehensive documentation")
    # Kept your original launch parameters
    demo.launch(

        mcp_server=True
    )