hmcp / app.py
adowu's picture
Update app.py
1d0f83d
#!/usr/bin/env python3
"""
HuggingMCP - Enhanced Hugging Face MCP Server
Optimized with 11 main commands and enhanced debugging
"""
import os
import sys
import logging
import traceback
import time
from typing import Dict, Any, Optional, Union, List
# Enhanced stderr debugging
def debug_stderr(message: str, level: str = "INFO"):
"""Enhanced debug output to stderr for MCP troubleshooting"""
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
print(f"[{timestamp}] [{level}] HuggingMCP: {message}", file=sys.stderr, flush=True)
# Startup debugging
debug_stderr("?? HuggingMCP server starting up...")
debug_stderr(f"Python executable: {sys.executable}")
debug_stderr(f"Script path: {__file__}")
debug_stderr(f"Working directory: {os.getcwd()}")
# Setup enhanced logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(sys.stderr),
logging.FileHandler('/tmp/hugmcp_debug.log', mode='a')
]
)
logger = logging.getLogger(__name__)
# Import MCP with error handling
try:
from mcp.server.fastmcp import FastMCP
debug_stderr("? MCP imported successfully")
except ImportError as e:
debug_stderr(f"? MCP import failed: {e}", "ERROR")
logger.error(f"MCP not installed. Run: pip3 install 'mcp[cli]' - Error: {e}")
sys.exit(1)
# Import Hugging Face with error handling
try:
from huggingface_hub import (
HfApi, create_repo, upload_file, delete_repo, delete_file,
list_models, list_datasets, list_spaces, model_info, dataset_info,
hf_hub_download, login, logout, whoami, create_collection,
get_collection, add_collection_item, create_discussion,
get_repo_discussions, get_discussion_details, CommitOperationAdd, CommitOperationDelete
)
debug_stderr("? Hugging Face Hub core imports successful")
# Optional imports for advanced features
try:
from huggingface_hub import list_liked_repos, list_repo_refs, list_repo_commits
HAS_ADVANCED_REPO = True
debug_stderr("? Advanced repository features available")
except ImportError:
HAS_ADVANCED_REPO = False
debug_stderr("?? Advanced repository features not available", "WARN")
try:
from huggingface_hub import get_space_runtime, restart_space, pause_space, set_space_sleep_time, duplicate_space
HAS_SPACE_MANAGEMENT = True
debug_stderr("? Space management features available")
except ImportError:
HAS_SPACE_MANAGEMENT = False
debug_stderr("?? Space management features not available", "WARN")
try:
from huggingface_hub import create_branch, delete_branch
HAS_BRANCH_MANAGEMENT = True
debug_stderr("? Branch management features available")
except ImportError:
HAS_BRANCH_MANAGEMENT = False
debug_stderr("?? Branch management features not available", "WARN")
try:
from huggingface_hub import get_inference_api
HAS_INFERENCE_API = True
debug_stderr("? Inference API features available")
except ImportError:
HAS_INFERENCE_API = False
debug_stderr("?? Inference API features not available", "WARN")
debug_stderr("? Hugging Face Hub imported successfully")
except ImportError as e:
debug_stderr(f"? Hugging Face Hub import failed: {e}", "ERROR")
logger.error(f"huggingface_hub not installed. Run: pip3 install huggingface_hub - Error: {e}")
sys.exit(1)
# Import additional libraries for enhanced functionality (optional)
try:
import json
import re
import base64
import hashlib
import urllib.parse
from datetime import datetime, timedelta
from collections import defaultdict
from pathlib import Path
debug_stderr("? Standard libraries imported successfully")
except ImportError as e:
debug_stderr(f"?? Some standard libraries missing: {e}", "WARN")
# Initialize MCP server with metadata
try:
try:
from importlib.metadata import PackageNotFoundError, version # type: ignore
except Exception:
PackageNotFoundError = Exception # type: ignore
version = None # type: ignore
try:
mcp_version = version("mcp") if version else None
except PackageNotFoundError:
mcp_version = None
except Exception:
mcp_version = None
if mcp_version:
debug_stderr(f"mcp package version: {mcp_version}")
_server_description = "Advanced Hugging Face MCP Server with 18+ comprehensive tools for ML workflows"
try:
mcp = FastMCP("HuggingMCP", description=_server_description)
except TypeError as e:
if "description" in str(e):
mcp = FastMCP("HuggingMCP")
else:
raise
debug_stderr("? FastMCP server initialized")
except Exception as e:
debug_stderr(f"? FastMCP initialization failed: {e}", "ERROR")
logger.error(f"Failed to initialize FastMCP: {e}")
sys.exit(1)
# Configuration with enhanced debugging
TOKEN = os.getenv("HF_TOKEN")
READ_ONLY = os.getenv("HF_READ_ONLY", "false").lower() == "true"
ADMIN_MODE = os.getenv("HF_ADMIN_MODE", "false").lower() == "true"
MAX_FILE_SIZE = int(os.getenv("HF_MAX_FILE_SIZE", "104857600")) # 100MB default
INFERENCE_TIMEOUT = int(os.getenv("HF_INFERENCE_TIMEOUT", "30")) # 30 seconds default
ENABLE_INFERENCE = os.getenv("HF_ENABLE_INFERENCE", "true").lower() == "true"
CACHE_ENABLED = os.getenv("HF_CACHE_ENABLED", "true").lower() == "true"
debug_stderr(f"Configuration loaded - Token: {'?' if TOKEN else '?'}, Read-only: {READ_ONLY}, Admin: {ADMIN_MODE}")
# Initialize API with error handling
try:
api = HfApi(token=TOKEN) if TOKEN else HfApi()
debug_stderr("? HfApi initialized successfully")
except Exception as e:
debug_stderr(f"? HfApi initialization failed: {e}", "ERROR")
logger.error(f"Failed to initialize HfApi: {e}")
sys.exit(1)
logger.info(f"?? HuggingMCP initialized - Token: {'?' if TOKEN else '?'}, Admin: {ADMIN_MODE}")
# =============================================================================
# HELPER FUNCTIONS (Shared across commands)
# =============================================================================
def validate_auth(operation: str = "operation") -> Optional[Dict[str, Any]]:
"""Validate authentication for operations that require it"""
if not TOKEN:
return {
"error": f"? Authentication required for {operation}",
"help": "Set HF_TOKEN environment variable with your Hugging Face token",
"instructions": {
"get_token": "Visit https://huggingface.co/settings/tokens to create a token",
"set_env": "Set HF_TOKEN=your_token_here in your environment",
"restart": "Restart the MCP server after setting the token"
}
}
return None
def validate_permissions(require_write: bool = False, require_admin: bool = False) -> Optional[Dict[str, Any]]:
"""Validate permissions for operations"""
if require_write and READ_ONLY:
return {
"error": "? Read-only mode - write operations not allowed",
"help": "Set HF_READ_ONLY=false to enable write operations",
"current_mode": "read-only",
"required_mode": "read-write"
}
if require_admin and not ADMIN_MODE:
return {
"error": "? Admin mode required for this operation",
"help": "Set HF_ADMIN_MODE=true to enable admin operations",
"current_mode": "standard",
"required_mode": "admin",
"warning": "Admin mode allows dangerous operations like repository deletion"
}
return None
def validate_repo_id(repo_id: str) -> Optional[Dict[str, Any]]:
"""Validate repository ID format"""
if not repo_id:
return {"error": "? Repository ID cannot be empty"}
if len(repo_id) > 96: # HF limit
return {"error": "? Repository ID too long (max 96 characters)"}
# Check for valid characters
if not re.match(r'^[a-zA-Z0-9]([a-zA-Z0-9._-]*[a-zA-Z0-9])?(/[a-zA-Z0-9]([a-zA-Z0-9._-]*[a-zA-Z0-9])?)?$', repo_id):
return {
"error": "? Invalid repository ID format",
"help": "Repository ID should be 'username/repo-name' or just 'repo-name'",
"examples": ["microsoft/DialoGPT-medium", "my-awesome-model", "datasets/squad"]
}
return None
def validate_file_path(filepath: str) -> Optional[Dict[str, Any]]:
"""Validate file path for repository operations"""
if not filepath:
return {"error": "? File path cannot be empty"}
# Check for dangerous patterns
dangerous_patterns = ['..', '//', '\\', '<', '>', '|', '?', '*', '\0']
for pattern in dangerous_patterns:
if pattern in filepath:
return {
"error": f"? File path contains unsafe characters: {pattern}",
"help": "Use only alphanumeric characters, hyphens, underscores, dots, and forward slashes"
}
if len(filepath) > 255:
return {"error": "? File path too long (max 255 characters)"}
return None
def safe_execute(func, operation_name: str, *args, **kwargs) -> Dict[str, Any]:
"""Safely execute functions with comprehensive error handling"""
try:
debug_stderr(f"Executing {operation_name}")
result = func(*args, **kwargs)
debug_stderr(f"? {operation_name} completed successfully")
return result
except Exception as e:
error_msg = f"? {operation_name} failed: {str(e)}"
debug_stderr(error_msg, "ERROR")
logger.error(f"{operation_name} error: {e}")
logger.error(traceback.format_exc())
return {"error": error_msg, "details": str(e)}
def format_repo_info(info) -> Dict[str, Any]:
"""Format repository information consistently"""
return {
"id": info.id,
"author": info.author,
"created_at": info.created_at.isoformat() if info.created_at else None,
"last_modified": info.last_modified.isoformat() if info.last_modified else None,
"private": info.private,
"downloads": getattr(info, 'downloads', 0),
"likes": getattr(info, 'likes', 0),
"tags": getattr(info, 'tags', [])[:5], # Limit for readability
"files": [s.rfilename for s in info.siblings[:10]] if hasattr(info, 'siblings') else []
}
def get_user_namespace() -> str:
"""Get current user's namespace safely"""
try:
if TOKEN:
user_info = whoami(token=TOKEN)
return user_info.get('name', 'unknown')
return 'unknown'
except Exception:
return 'unknown'
def format_model_card_data(card_data: Dict[str, Any]) -> Dict[str, Any]:
"""Format model card data consistently"""
return {
"license": card_data.get("license"),
"language": card_data.get("language", []),
"tags": card_data.get("tags", []),
"datasets": card_data.get("datasets", []),
"metrics": card_data.get("metrics", []),
"pipeline_tag": card_data.get("pipeline_tag"),
"library_name": card_data.get("library_name"),
"base_model": card_data.get("base_model"),
"model_type": card_data.get("model-type")
}
def calculate_popularity_score(repo_info) -> int:
"""Calculate a popularity score for ranking"""
downloads = getattr(repo_info, 'downloads', 0) or 0
likes = getattr(repo_info, 'likes', 0) or 0
# Weight downloads more heavily than likes
return (downloads * 2) + likes
def validate_file_size(content: str, max_size: int = None) -> Optional[Dict[str, Any]]:
"""Validate file size before operations"""
if max_size is None:
max_size = MAX_FILE_SIZE
size = len(content.encode('utf-8'))
if size > max_size:
return {
"error": f"? File size ({size:,} bytes) exceeds maximum ({max_size:,} bytes)",
"current_size": size,
"max_size": max_size
}
return None
def sanitize_filename(filename: str) -> str:
"""Sanitize filename for safe repository operations"""
# Remove or replace problematic characters
sanitized = re.sub(r'[<>:"/\\|?*]', '_', filename)
# Remove multiple consecutive dots or underscores
sanitized = re.sub(r'[._]{2,}', '_', sanitized)
# Remove leading/trailing dots and spaces
sanitized = sanitized.strip('. ')
return sanitized or "unnamed_file"
def generate_commit_hash(content: str) -> str:
"""Generate a hash for content tracking"""
return hashlib.sha256(content.encode('utf-8')).hexdigest()[:12]
def parse_repo_id(repo_id: str) -> Dict[str, str]:
"""Parse repository ID into components"""
if '/' in repo_id:
author, name = repo_id.split('/', 1)
return {"author": author, "name": name, "full_id": repo_id}
else:
return {"author": "", "name": repo_id, "full_id": repo_id}
# =============================================================================
# CONSOLIDATED COMMANDS (Maximum 10 main commands)
# =============================================================================
@mcp.tool()
def hf_system_info() -> Dict[str, Any]:
"""Get HuggingMCP system information, configuration, and test connectivity"""
def _get_system_info():
user_info = None
if TOKEN:
try:
user_info = whoami(token=TOKEN)
except Exception as e:
debug_stderr(f"Failed to get user info: {e}", "WARN")
return {
"status": "? HuggingMCP is operational!",
"version": "3.0.0",
"server_info": {
"authenticated": bool(TOKEN),
"admin_mode": ADMIN_MODE,
"read_only": READ_ONLY,
"python_version": sys.version,
"script_path": __file__
},
"user_info": {
"authenticated": bool(TOKEN),
"name": user_info.get('name', 'Unknown') if user_info else 'Not authenticated',
"email": user_info.get('email', 'Unknown') if user_info else 'Not authenticated'
},
"capabilities": {
"create_repos": not READ_ONLY,
"delete_repos": ADMIN_MODE,
"read_files": True,
"write_files": not READ_ONLY,
"search": True,
"collections": not READ_ONLY,
"pull_requests": not READ_ONLY,
"model_evaluation": True,
"dataset_processing": True,
"license_management": True,
"community_features": True,
"space_management": not READ_ONLY,
"inference_tools": ENABLE_INFERENCE,
"ai_workflows": True,
"advanced_analytics": True,
"repository_utilities": True
},
"commands_available": 18,
"new_features": [
"?? Model evaluation and testing",
"??? Advanced dataset processing",
"?? License management tools",
"?? Community interaction features",
"?? Space management capabilities",
"?? AI inference tools",
"?? Workflow automation",
"?? Advanced analytics engine",
"??? Repository utilities"
],
"debug_info": {
"working_directory": os.getcwd(),
"environment_vars": {
"HF_TOKEN": "?" if TOKEN else "?",
"HF_READ_ONLY": str(READ_ONLY),
"HF_ADMIN_MODE": str(ADMIN_MODE),
"HF_ENABLE_INFERENCE": str(ENABLE_INFERENCE),
"HF_MAX_FILE_SIZE": f"{MAX_FILE_SIZE:,} bytes",
"HF_CACHE_ENABLED": str(CACHE_ENABLED)
}
}
}
return safe_execute(_get_system_info, "system_info")
@mcp.tool()
def hf_repository_manager(
action: str,
repo_id: str,
repo_type: str = "model",
**kwargs
) -> Dict[str, Any]:
"""Comprehensive repository management: create, delete, info, list_files
Actions:
- create: Create new repository (supports private, description, space_sdk, creator)
- creator: Repository creator (defaults to authenticated user)
- delete: Delete repository (requires admin mode)
- info: Get repository information
- list_files: List all files in repository
"""
def _manage_repository():
# Validate repository ID for all actions
repo_validation = validate_repo_id(repo_id)
if repo_validation: return repo_validation
if action == "create":
auth_error = validate_auth("repository creation")
if auth_error: return auth_error
perm_error = validate_permissions(require_write=True)
if perm_error: return perm_error
private = kwargs.get("private", False)
description = kwargs.get("description")
space_sdk = kwargs.get("space_sdk")
# Get creator - default to current authenticated user
creator = kwargs.get("creator")
if not creator and TOKEN:
try:
user_info = whoami(token=TOKEN)
creator = user_info.get('name', 'unknown')
except Exception:
creator = 'unknown'
# Validate space_sdk for Spaces
valid_sdks = ["gradio", "streamlit", "docker", "static"]
if repo_type == "space":
if not space_sdk or space_sdk not in valid_sdks:
return {
"error": "? space_sdk required for Spaces",
"valid_options": valid_sdks,
"help": {
"gradio": "Interactive ML demos with Python",
"streamlit": "Data apps and dashboards",
"docker": "Custom applications",
"static": "HTML/CSS/JS websites"
}
}
create_params = {
"repo_id": repo_id,
"repo_type": repo_type,
"private": private,
"token": TOKEN
}
if repo_type == "space" and space_sdk:
create_params["space_sdk"] = space_sdk
repo_url = create_repo(**create_params)
result = {
"status": "success",
"message": f"? Created {repo_type}: {repo_id}",
"repo_url": repo_url,
"repo_id": repo_id,
"repo_type": repo_type,
"private": private,
"creator": creator or "unknown"
}
if repo_type == "space":
result["space_sdk"] = space_sdk
result["next_steps"] = {
"gradio": "Upload app.py + requirements.txt",
"streamlit": "Upload app.py + requirements.txt",
"docker": "Upload Dockerfile + app files",
"static": "Upload index.html + assets"
}[space_sdk]
return result
elif action == "delete":
auth_error = validate_auth("repository deletion")
if auth_error: return auth_error
perm_error = validate_permissions(require_admin=True)
if perm_error: return perm_error
delete_repo(repo_id=repo_id, repo_type=repo_type, token=TOKEN)
return {
"status": "success",
"message": f"??? Deleted {repo_type}: {repo_id}",
"repo_id": repo_id,
"repo_type": repo_type
}
elif action == "info":
if repo_type == "model":
info = model_info(repo_id, token=TOKEN)
elif repo_type == "dataset":
info = dataset_info(repo_id, token=TOKEN)
else:
return {"error": f"Unsupported repo_type for info: {repo_type}"}
return format_repo_info(info)
elif action == "list_files":
files = api.list_repo_files(repo_id=repo_id, repo_type=repo_type, token=TOKEN)
return {
"repo_id": repo_id,
"repo_type": repo_type,
"file_count": len(files),
"files": files
}
else:
return {"error": f"? Invalid action: {action}. Use: create, delete, info, list_files"}
return safe_execute(_manage_repository, f"repository_{action}")
@mcp.tool()
def hf_file_operations(
action: str,
repo_id: str,
filename: str,
repo_type: str = "model",
**kwargs
) -> Dict[str, Any]:
"""Comprehensive file operations: read, write, edit, delete, validate, backup
Actions:
- read: Read file content (supports max_size, chunked reading, encoding detection)
- write: Write/upload file content (with validation and backup options)
- edit: Edit file by replacing text (with backup and verification)
- delete: Delete file from repository (with confirmation)
- validate: Validate file format and content
- backup: Create backup of file before operations
- batch_edit: Edit multiple files with pattern matching
"""
def _handle_file_operation():
# Validate inputs for all file operations
repo_validation = validate_repo_id(repo_id)
if repo_validation: return repo_validation
file_validation = validate_file_path(filename)
if file_validation: return file_validation
if action == "read":
revision = kwargs.get("revision", "main")
max_size = kwargs.get("max_size", 500000)
chunk_size = kwargs.get("chunk_size")
chunk_number = kwargs.get("chunk_number", 0)
# Download file
file_path = hf_hub_download(
repo_id=repo_id,
filename=filename,
repo_type=repo_type,
revision=revision,
token=TOKEN
)
file_size = os.path.getsize(file_path)
# Handle chunked reading
if chunk_size:
start_pos = chunk_number * chunk_size
with open(file_path, 'r', encoding='utf-8') as f:
f.seek(start_pos)
content = f.read(chunk_size)
total_chunks = (file_size + chunk_size - 1) // chunk_size
return {
"repo_id": repo_id,
"filename": filename,
"content": content,
"chunk_number": chunk_number,
"chunk_size": len(content),
"total_chunks": total_chunks,
"file_size": file_size,
"has_more": chunk_number < total_chunks - 1,
"message": f"?? Read chunk {chunk_number + 1}/{total_chunks}"
}
# Handle regular reading with size limits
try:
with open(file_path, 'r', encoding='utf-8') as f:
if max_size > 0 and file_size > max_size:
content = f.read(max_size)
return {
"repo_id": repo_id,
"filename": filename,
"content": content,
"size": len(content),
"full_file_size": file_size,
"truncated": True,
"message": f"?? Read {filename} (truncated to {max_size:,} chars)",
"note": "Use max_size=0 for full file or increase max_size"
}
else:
content = f.read()
return {
"repo_id": repo_id,
"filename": filename,
"content": content,
"size": len(content),
"full_file_size": file_size,
"truncated": False,
"message": f"?? Successfully read {filename} ({file_size:,} chars)"
}
except UnicodeDecodeError:
return {
"repo_id": repo_id,
"filename": filename,
"error": "Binary file - cannot display as text",
"size": file_size,
"is_binary": True
}
elif action == "write":
auth_error = validate_auth("file writing")
if auth_error: return auth_error
perm_error = validate_permissions(require_write=True)
if perm_error: return perm_error
content = kwargs.get("content", "")
commit_message = kwargs.get("commit_message", f"Upload {filename}")
# Handle different content formats
if isinstance(content, list):
text_content = ""
for item in content:
if isinstance(item, dict) and 'text' in item:
text_content += item['text']
elif isinstance(item, str):
text_content += item
content = text_content
elif not isinstance(content, str):
content = str(content)
commit_url = upload_file(
path_or_fileobj=content.encode('utf-8'),
path_in_repo=filename,
repo_id=repo_id,
repo_type=repo_type,
token=TOKEN,
commit_message=commit_message
)
return {
"status": "success",
"message": f"?? Successfully wrote {filename}",
"repo_id": repo_id,
"filename": filename,
"size": len(content.encode('utf-8')),
"commit_url": commit_url,
"commit_message": commit_message
}
elif action == "edit":
auth_error = validate_auth("file editing")
if auth_error: return auth_error
perm_error = validate_permissions(require_write=True)
if perm_error: return perm_error
old_text = kwargs.get("old_text", "")
new_text = kwargs.get("new_text", "")
commit_message = kwargs.get("commit_message", f"Edit {filename}")
if not old_text:
return {"error": "? old_text parameter required for editing"}
# Read current content
read_result = hf_file_operations("read", repo_id, filename, repo_type)
if "error" in read_result:
return read_result
current_content = read_result["content"]
# Check if old_text exists
if old_text not in current_content:
return {
"error": f"? Text not found in {filename}",
"searched_for": old_text[:100] + "..." if len(old_text) > 100 else old_text,
"file_preview": current_content[:200] + "..." if len(current_content) > 200 else current_content
}
# Replace text
new_content = current_content.replace(old_text, new_text, 1)
# Write updated content
write_result = hf_file_operations("write", repo_id, filename, repo_type,
content=new_content, commit_message=commit_message)
if "error" in write_result:
return write_result
return {
"status": "success",
"message": f"?? Successfully edited {filename}",
"repo_id": repo_id,
"filename": filename,
"old_text_length": len(old_text),
"new_text_length": len(new_text),
"total_size": len(new_content),
"commit_url": write_result["commit_url"]
}
elif action == "delete":
auth_error = validate_auth("file deletion")
if auth_error: return auth_error
perm_error = validate_permissions(require_write=True)
if perm_error: return perm_error
commit_message = kwargs.get("commit_message", f"Delete {filename}")
delete_file(
path_in_repo=filename,
repo_id=repo_id,
repo_type=repo_type,
token=TOKEN,
commit_message=commit_message
)
return {
"status": "success",
"message": f"??? Successfully deleted {filename}",
"repo_id": repo_id,
"filename": filename,
"commit_message": commit_message
}
elif action == "validate":
try:
# Read file first
read_result = hf_file_operations("read", repo_id, filename, repo_type, max_size=100000)
if "error" in read_result:
return read_result
content = read_result["content"]
file_ext = Path(filename).suffix.lower()
validation_results = {
"repo_id": repo_id,
"filename": filename,
"file_extension": file_ext,
"file_size": len(content.encode('utf-8')),
"validation_timestamp": datetime.now().isoformat(),
"checks": {}
}
# Basic checks
validation_results["checks"]["valid_utf8"] = not read_result.get("is_binary", False)
validation_results["checks"]["reasonable_size"] = validation_results["file_size"] < MAX_FILE_SIZE
validation_results["checks"]["safe_filename"] = sanitize_filename(filename) == filename
# Format-specific validation
if file_ext == ".json":
try:
json.loads(content)
validation_results["checks"]["valid_json"] = True
except json.JSONDecodeError:
validation_results["checks"]["valid_json"] = False
elif file_ext == ".md":
validation_results["checks"]["has_headers"] = bool(re.search(r'^#', content, re.MULTILINE))
validation_results["checks"]["reasonable_length"] = 100 <= len(content) <= 50000
elif file_ext in [".py", ".js", ".ts"]:
validation_results["checks"]["no_syntax_errors"] = not bool(re.search(r'SyntaxError|TypeError', content))
validation_results["checks"]["has_functions"] = bool(re.search(r'def |function |const ', content))
# Calculate validation score
passed_checks = sum(1 for v in validation_results["checks"].values() if v)
total_checks = len(validation_results["checks"])
validation_results["validation_score"] = (passed_checks / total_checks) * 100 if total_checks > 0 else 100
validation_results["validation_status"] = "? Valid" if validation_results["validation_score"] >= 80 else "?? Issues found"
return validation_results
except Exception as e:
return {"error": f"? File validation failed: {str(e)}"}
elif action == "backup":
try:
# Read current file content
read_result = hf_file_operations("read", repo_id, filename, repo_type)
if "error" in read_result:
return read_result
# Create backup filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_filename = f"backups/{filename}.backup_{timestamp}"
# Write backup file (if write permissions available)
if not READ_ONLY:
backup_result = hf_file_operations("write", repo_id, backup_filename, repo_type,
content=read_result["content"],
commit_message=f"Backup of {filename}")
return {
"status": "success",
"message": f"?? Backup created: {backup_filename}",
"original_file": filename,
"backup_file": backup_filename,
"backup_size": read_result.get("size", 0),
"backup_timestamp": timestamp,
"commit_url": backup_result.get("commit_url")
}
else:
return {
"status": "info",
"message": "?? Backup content prepared (read-only mode)",
"original_file": filename,
"backup_content": read_result["content"],
"backup_size": read_result.get("size", 0),
"backup_timestamp": timestamp
}
except Exception as e:
return {"error": f"? File backup failed: {str(e)}"}
elif action == "batch_edit":
auth_error = validate_auth("batch file editing")
if auth_error: return auth_error
perm_error = validate_permissions(require_write=True)
if perm_error: return perm_error
pattern = kwargs.get("pattern", "")
replacement = kwargs.get("replacement", "")
file_patterns = kwargs.get("file_patterns", ["*.md", "*.txt"])
if not pattern:
return {"error": "? pattern parameter required for batch editing"}
try:
# Get list of files matching patterns
files = api.list_repo_files(repo_id=repo_id, repo_type=repo_type, token=TOKEN)
matching_files = []
for file_pattern in file_patterns:
import fnmatch
matching_files.extend([f for f in files if fnmatch.fnmatch(f, file_pattern)])
# Remove duplicates
matching_files = list(set(matching_files))
edit_results = []
successful_edits = 0
for file_path in matching_files[:20]: # Limit to 20 files
try:
# Read file
read_result = hf_file_operations("read", repo_id, file_path, repo_type)
if "error" in read_result:
edit_results.append({
"file": file_path,
"status": "failed",
"error": "Could not read file"
})
continue
content = read_result["content"]
# Check if pattern exists
if pattern not in content:
edit_results.append({
"file": file_path,
"status": "skipped",
"reason": "Pattern not found"
})
continue
# Apply replacement
new_content = content.replace(pattern, replacement)
# Write back
write_result = hf_file_operations("write", repo_id, file_path, repo_type,
content=new_content,
commit_message=f"Batch edit: replace '{pattern[:50]}...' in {file_path}")
if "error" not in write_result:
edit_results.append({
"file": file_path,
"status": "success",
"changes": content.count(pattern)
})
successful_edits += 1
else:
edit_results.append({
"file": file_path,
"status": "failed",
"error": write_result["error"]
})
except Exception as e:
edit_results.append({
"file": file_path,
"status": "failed",
"error": str(e)
})
return {
"status": "completed",
"message": f"?? Batch edit completed: {successful_edits}/{len(matching_files)} files updated",
"pattern": pattern,
"replacement": replacement,
"total_files": len(matching_files),
"successful_edits": successful_edits,
"results": edit_results,
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Batch edit failed: {str(e)}"}
else:
return {"error": f"? Invalid action: {action}. Use: read, write, edit, delete, validate, backup, batch_edit"}
return safe_execute(_handle_file_operation, f"file_{action}")
@mcp.tool()
def hf_search_hub(
content_type: str,
query: Optional[str] = None,
author: Optional[str] = None,
filter_tag: Optional[str] = None,
limit: int = 20
) -> Dict[str, Any]:
"""Search Hugging Face Hub for models, datasets, or spaces
Args:
content_type: Type to search ("models", "datasets", "spaces")
query: Search query string
author: Filter by author username
filter_tag: Filter by tag
limit: Maximum results to return
"""
def _search_hub():
results = []
if content_type == "models":
items = list_models(
search=query,
author=author,
filter=filter_tag,
sort="downloads",
direction=-1,
limit=limit,
token=TOKEN
)
for item in items:
results.append({
"id": item.id,
"author": item.author,
"downloads": item.downloads,
"likes": item.likes,
"tags": item.tags[:5],
"created_at": item.created_at.isoformat() if item.created_at else None
})
elif content_type == "datasets":
items = list_datasets(
search=query,
author=author,
filter=filter_tag,
limit=limit,
token=TOKEN
)
for item in items:
results.append({
"id": item.id,
"author": item.author,
"downloads": item.downloads,
"likes": item.likes,
"tags": item.tags[:5],
"created_at": item.created_at.isoformat() if item.created_at else None
})
elif content_type == "spaces":
items = list_spaces(
search=query,
author=author,
limit=limit,
token=TOKEN
)
for item in items:
results.append({
"id": item.id,
"author": item.author,
"likes": item.likes,
"tags": item.tags[:5],
"sdk": getattr(item, 'sdk', None),
"created_at": item.created_at.isoformat() if item.created_at else None
})
else:
return {"error": f"? Invalid content_type: {content_type}. Use: models, datasets, spaces"}
return {
"content_type": content_type,
"query": query,
"author": author,
"filter_tag": filter_tag,
"total_results": len(results),
"results": results
}
return safe_execute(_search_hub, f"search_{content_type}")
@mcp.tool()
def hf_collections(
action: str,
**kwargs
) -> Dict[str, Any]:
"""Manage Hugging Face Collections: create, add_item, info
Actions:
- create: Create new collection (title, namespace, description, private)
- add_item: Add item to collection (collection_slug, item_id, item_type, note)
- info: Get collection information (collection_slug)
"""
def _manage_collections():
if action == "create":
auth_error = validate_auth("collection creation")
if auth_error: return auth_error
perm_error = validate_permissions(require_write=True)
if perm_error: return perm_error
title = kwargs.get("title")
if not title:
return {"error": "? title parameter required for collection creation"}
namespace = kwargs.get("namespace") or get_user_namespace()
description = kwargs.get("description")
private = kwargs.get("private", False)
collection = create_collection(
title=title,
namespace=namespace,
description=description,
private=private,
token=TOKEN
)
return {
"status": "success",
"message": f"?? Created collection: {title}",
"collection_slug": collection.slug,
"title": collection.title,
"owner": getattr(collection, 'owner', namespace),
"url": getattr(collection, 'url', f"https://huggingface.co/collections/{namespace}/{collection.slug}")
}
elif action == "add_item":
auth_error = validate_auth("collection modification")
if auth_error: return auth_error
perm_error = validate_permissions(require_write=True)
if perm_error: return perm_error
collection_slug = kwargs.get("collection_slug")
item_id = kwargs.get("item_id")
item_type = kwargs.get("item_type")
note = kwargs.get("note")
if not all([collection_slug, item_id, item_type]):
return {"error": "? collection_slug, item_id, and item_type required"}
add_collection_item(
collection_slug=collection_slug,
item_id=item_id,
item_type=item_type,
note=note,
token=TOKEN
)
return {
"status": "success",
"message": f"? Added {item_id} to collection",
"collection_slug": collection_slug,
"item_id": item_id,
"item_type": item_type
}
elif action == "info":
collection_slug = kwargs.get("collection_slug")
if not collection_slug:
return {"error": "? collection_slug parameter required"}
collection = get_collection(collection_slug, token=TOKEN)
items = []
for item in collection.items:
items.append({
"item_id": item.item_id,
"item_type": item.item_type,
"position": item.position,
"note": getattr(item, 'note', None)
})
return {
"slug": collection.slug,
"title": collection.title,
"description": collection.description,
"owner": collection.owner,
"items": items,
"item_count": len(items),
"url": collection.url
}
else:
return {"error": f"? Invalid action: {action}. Use: create, add_item, info"}
return safe_execute(_manage_collections, f"collections_{action}")
@mcp.tool()
def hf_pull_requests(
action: str,
repo_id: str,
repo_type: str = "model",
**kwargs
) -> Dict[str, Any]:
"""Manage Pull Requests: create, list, details, create_with_files
Actions:
- create: Create empty PR (title, description)
- list: List PRs (status, author)
- details: Get PR details (pr_number)
- create_with_files: Create PR with file changes (files, commit_message, pr_title, pr_description)
"""
def _manage_pull_requests():
if action == "create":
auth_error = validate_auth("pull request creation")
if auth_error: return auth_error
title = kwargs.get("title")
if not title or len(title.strip()) < 3:
return {"error": "? title required (min 3 characters)"}
description = kwargs.get("description", "Pull Request created with HuggingMCP")
discussion = create_discussion(
repo_id=repo_id,
title=title.strip(),
description=description,
repo_type=repo_type,
pull_request=True,
token=TOKEN
)
return {
"status": "success",
"message": f"?? Created PR: {title}",
"pr_number": discussion.num,
"pr_title": discussion.title,
"pr_url": discussion.url,
"repo_id": repo_id,
"repo_type": repo_type
}
elif action == "list":
status = kwargs.get("status", "open")
author = kwargs.get("author")
discussion_status = None if status == "all" else status
discussions = get_repo_discussions(
repo_id=repo_id,
repo_type=repo_type,
discussion_type="pull_request",
discussion_status=discussion_status,
author=author,
token=TOKEN
)
prs = []
for discussion in discussions:
prs.append({
"number": discussion.num,
"title": discussion.title,
"author": discussion.author,
"status": discussion.status,
"created_at": discussion.created_at.isoformat() if discussion.created_at else None,
"url": discussion.url,
"is_pull_request": discussion.is_pull_request
})
return {
"repo_id": repo_id,
"repo_type": repo_type,
"status_filter": status,
"author_filter": author,
"total_prs": len(prs),
"pull_requests": prs[:20]
}
elif action == "details":
pr_number = kwargs.get("pr_number")
if pr_number is None:
return {"error": "? pr_number parameter required"}
discussion = get_discussion_details(
repo_id=repo_id,
discussion_num=pr_number,
repo_type=repo_type,
token=TOKEN
)
events = []
for event in discussion.events:
events.append({
"type": event.type,
"author": event.author,
"created_at": event.created_at.isoformat() if event.created_at else None,
"content": getattr(event, 'content', '') if hasattr(event, 'content') else ''
})
return {
"repo_id": repo_id,
"pr_number": discussion.num,
"title": discussion.title,
"author": discussion.author,
"status": discussion.status,
"created_at": discussion.created_at.isoformat() if discussion.created_at else None,
"is_pull_request": discussion.is_pull_request,
"url": discussion.url,
"conflicting_files": discussion.conflicting_files,
"target_branch": discussion.target_branch,
"merge_commit_oid": discussion.merge_commit_oid,
"events": events,
"total_events": len(events)
}
elif action == "create_with_files":
auth_error = validate_auth("pull request with files creation")
if auth_error: return auth_error
perm_error = validate_permissions(require_write=True)
if perm_error: return perm_error
files = kwargs.get("files", [])
commit_message = kwargs.get("commit_message")
pr_title = kwargs.get("pr_title")
pr_description = kwargs.get("pr_description")
if not files:
return {"error": "? files parameter required (list of {path, content} dicts)"}
if not commit_message:
return {"error": "? commit_message parameter required"}
# Create commit operations
operations = []
for file_op in files:
if not isinstance(file_op, dict) or "path" not in file_op or "content" not in file_op:
return {"error": "? Invalid file format. Use: [{\"path\": \"file.txt\", \"content\": \"content\"}]"}
operations.append(
CommitOperationAdd(
path_in_repo=file_op["path"],
path_or_fileobj=file_op["content"].encode('utf-8')
)
)
# Create commit with PR
commit_result = api.create_commit(
repo_id=repo_id,
operations=operations,
commit_message=commit_message,
repo_type=repo_type,
create_pr=True,
pr_title=pr_title or f"PR: {commit_message}",
pr_description=pr_description or f"Automated PR via HuggingMCP\n\nCommit: {commit_message}",
token=TOKEN
)
return {
"status": "success",
"message": f"?? Created commit and PR: {commit_message}",
"commit_url": commit_result.commit_url,
"pr_url": getattr(commit_result, 'pr_url', None),
"commit_sha": commit_result.oid,
"repo_id": repo_id,
"repo_type": repo_type,
"files_changed": len(operations)
}
else:
return {"error": f"? Invalid action: {action}. Use: create, list, details, create_with_files"}
return safe_execute(_manage_pull_requests, f"pr_{action}")
@mcp.tool()
def hf_upload_manager(
action: str,
repo_id: str,
repo_type: str = "model",
**kwargs
) -> Dict[str, Any]:
"""Upload management: single_file, multiple_files, with_pr
Actions:
- single_file: Upload one file (file_path, content, commit_message)
- multiple_files: Upload multiple files (files list, commit_message)
- with_pr: Upload file(s) and create PR (file_path, content, commit_message, pr_title, pr_description)
"""
def _manage_uploads():
auth_error = validate_auth("file upload")
if auth_error: return auth_error
perm_error = validate_permissions(require_write=True)
if perm_error: return perm_error
if action == "single_file":
file_path = kwargs.get("file_path")
content = kwargs.get("content")
commit_message = kwargs.get("commit_message", f"Upload {file_path}")
if not file_path or content is None:
return {"error": "? file_path and content parameters required"}
commit_url = upload_file(
path_or_fileobj=content.encode('utf-8'),
path_in_repo=file_path,
repo_id=repo_id,
repo_type=repo_type,
token=TOKEN,
commit_message=commit_message
)
return {
"status": "success",
"message": f"?? Uploaded {file_path}",
"file_path": file_path,
"file_size": len(content.encode('utf-8')),
"commit_url": commit_url,
"commit_message": commit_message
}
elif action == "multiple_files":
files = kwargs.get("files", [])
commit_message = kwargs.get("commit_message", "Upload multiple files")
if not files:
return {"error": "? files parameter required (list of {path, content} dicts)"}
operations = []
for file_op in files:
if not isinstance(file_op, dict) or "path" not in file_op or "content" not in file_op:
return {"error": "? Invalid file format. Use: [{\"path\": \"file.txt\", \"content\": \"content\"}]"}
operations.append(
CommitOperationAdd(
path_in_repo=file_op["path"],
path_or_fileobj=file_op["content"].encode('utf-8')
)
)
commit_result = api.create_commit(
repo_id=repo_id,
operations=operations,
commit_message=commit_message,
repo_type=repo_type,
token=TOKEN
)
return {
"status": "success",
"message": f"?? Uploaded {len(operations)} files",
"files_uploaded": len(operations),
"commit_url": commit_result.commit_url,
"commit_sha": commit_result.oid,
"commit_message": commit_message
}
elif action == "with_pr":
file_path = kwargs.get("file_path")
content = kwargs.get("content")
commit_message = kwargs.get("commit_message", f"Upload {file_path}")
pr_title = kwargs.get("pr_title", f"Add {file_path}")
pr_description = kwargs.get("pr_description", f"Uploaded {file_path} via HuggingMCP")
if not file_path or content is None:
return {"error": "? file_path and content parameters required"}
result = upload_file(
path_or_fileobj=content.encode('utf-8'),
path_in_repo=file_path,
repo_id=repo_id,
repo_type=repo_type,
commit_message=commit_message,
create_pr=True,
pr_title=pr_title,
pr_description=pr_description,
token=TOKEN
)
return {
"status": "success",
"message": f"?? Uploaded {file_path} and created PR",
"file_path": file_path,
"file_size": len(content.encode('utf-8')),
"commit_message": commit_message,
"pr_url": result if isinstance(result, str) else None,
"repo_id": repo_id,
"repo_type": repo_type
}
else:
return {"error": f"? Invalid action: {action}. Use: single_file, multiple_files, with_pr"}
return safe_execute(_manage_uploads, f"upload_{action}")
@mcp.tool()
def hf_repo_file_manager(
action: str,
repo_id: str,
repo_type: str = "model",
filename: Optional[str] = None,
**kwargs
) -> Dict[str, Any]:
"""Unified repository and file management with rename support"""
def _repo_file():
if action.startswith("repo_"):
return hf_repository_manager(action.replace("repo_", ""), repo_id, repo_type, **kwargs)
if action.startswith("file_"):
file_action = action.replace("file_", "")
if file_action == "rename":
auth_error = validate_auth("file rename")
if auth_error:
return auth_error
perm_error = validate_permissions(require_write=True)
if perm_error:
return perm_error
new_filename = kwargs.get("new_filename")
if not filename or not new_filename:
return {"error": "? filename and new_filename required"}
file_path = hf_hub_download(repo_id=repo_id, filename=filename, repo_type=repo_type, token=TOKEN)
with open(file_path, "rb") as f:
content = f.read()
operations = [
CommitOperationAdd(path_in_repo=new_filename, path_or_fileobj=content),
CommitOperationDelete(path_in_repo=filename),
]
commit_result = api.create_commit(
repo_id=repo_id,
operations=operations,
commit_message=kwargs.get("commit_message", f"Rename {filename} to {new_filename}"),
repo_type=repo_type,
token=TOKEN,
)
return {
"status": "success",
"message": f"?? Renamed {filename} to {new_filename}",
"commit_url": commit_result.commit_url,
"repo_id": repo_id,
"old_filename": filename,
"new_filename": new_filename,
}
return hf_file_operations(file_action, repo_id, filename, repo_type, **kwargs)
return {"error": f"? Unknown action: {action}"}
return safe_execute(_repo_file, f"repo_file_{action}")
@mcp.tool()
def hf_batch_operations(
operation_type: str,
operations: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""Execute multiple operations in batch with enhanced error handling
Args:
operation_type: Type of batch operation ("search", "info", "files")
operations: List of operation dictionaries with parameters
"""
def _execute_batch():
if not operations:
return {"error": "? operations list cannot be empty"}
results = []
errors = []
for i, op in enumerate(operations):
try:
if operation_type == "search":
content_type = op.get("content_type", "models")
result = hf_search_hub(
content_type=content_type,
query=op.get("query"),
author=op.get("author"),
filter_tag=op.get("filter_tag"),
limit=op.get("limit", 10)
)
elif operation_type == "info":
result = hf_repository_manager(
action="info",
repo_id=op.get("repo_id"),
repo_type=op.get("repo_type", "model")
)
elif operation_type == "files":
result = hf_repository_manager(
action="list_files",
repo_id=op.get("repo_id"),
repo_type=op.get("repo_type", "model")
)
else:
errors.append(f"Operation {i}: Invalid operation_type: {operation_type}")
continue
results.append({
"operation_index": i,
"operation": op,
"result": result
})
except Exception as e:
errors.append(f"Operation {i}: {str(e)}")
debug_stderr(f"Batch operation {i} failed: {e}", "ERROR")
return {
"operation_type": operation_type,
"total_operations": len(operations),
"successful_operations": len(results),
"failed_operations": len(errors),
"results": results,
"errors": errors if errors else None,
"summary": f"? {len(results)} successful, ? {len(errors)} failed"
}
return safe_execute(_execute_batch, f"batch_{operation_type}")
@mcp.tool()
def hf_advanced_search(
query: str,
search_types: List[str] = ["models", "datasets", "spaces"],
filters: Optional[Dict[str, Any]] = None,
limit_per_type: int = 10
) -> Dict[str, Any]:
"""Advanced search across multiple content types with filtering
Args:
query: Search query string
search_types: List of content types to search ["models", "datasets", "spaces"]
filters: Optional filters dict {author, tags, etc.}
limit_per_type: Maximum results per content type
"""
def _advanced_search():
nonlocal filters
filters = filters or {}
author = filters.get("author")
filter_tag = filters.get("tag")
all_results = {}
total_found = 0
for content_type in search_types:
if content_type not in ["models", "datasets", "spaces"]:
continue
search_result = hf_search_hub(
content_type=content_type,
query=query,
author=author,
filter_tag=filter_tag,
limit=limit_per_type
)
if "error" not in search_result:
all_results[content_type] = search_result["results"]
total_found += len(search_result["results"])
else:
all_results[content_type] = []
# Combine and sort by popularity (downloads/likes)
combined_results = []
for content_type, results in all_results.items():
for result in results:
result["content_type"] = content_type
popularity = result.get("downloads", 0) + result.get("likes", 0)
result["popularity_score"] = popularity
combined_results.append(result)
# Sort by popularity
combined_results.sort(key=lambda x: x["popularity_score"], reverse=True)
return {
"query": query,
"search_types": search_types,
"filters": filters,
"total_results": total_found,
"results_by_type": all_results,
"combined_results": combined_results[:50], # Top 50 overall
"summary": {
"models": len(all_results.get("models", [])),
"datasets": len(all_results.get("datasets", [])),
"spaces": len(all_results.get("spaces", []))
}
}
return safe_execute(_advanced_search, "advanced_search")
@mcp.tool()
def hf_debug_diagnostics() -> Dict[str, Any]:
"""Comprehensive debugging and diagnostic information for troubleshooting"""
def _get_diagnostics():
diagnostics = {
"server_status": "?? Running",
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S UTC", time.gmtime()),
"environment": {
"python_version": sys.version,
"python_executable": sys.executable,
"script_path": __file__,
"working_directory": os.getcwd(),
"platform": sys.platform
},
"configuration": {
"hf_token_present": bool(TOKEN),
"read_only_mode": READ_ONLY,
"admin_mode": ADMIN_MODE,
"token_length": len(TOKEN) if TOKEN else 0
},
"dependencies": {},
"file_system": {
"script_exists": os.path.exists(__file__),
"script_readable": os.access(__file__, os.R_OK),
"script_size": os.path.getsize(__file__) if os.path.exists(__file__) else 0
},
"memory_usage": {},
"recent_operations": [],
"debug_logs": []
}
# Check dependencies
try:
import mcp
diagnostics["dependencies"]["mcp"] = f"? {mcp.__version__ if hasattr(mcp, '__version__') else 'installed'}"
except ImportError as e:
diagnostics["dependencies"]["mcp"] = f"? Missing: {e}"
try:
import huggingface_hub
diagnostics["dependencies"]["huggingface_hub"] = f"? {huggingface_hub.__version__}"
except ImportError as e:
diagnostics["dependencies"]["huggingface_hub"] = f"? Missing: {e}"
# Memory usage (basic)
try:
import psutil
process = psutil.Process()
diagnostics["memory_usage"] = {
"memory_percent": process.memory_percent(),
"memory_info": process.memory_info()._asdict()
}
except ImportError:
diagnostics["memory_usage"] = {"note": "psutil not available for detailed memory info"}
# Test basic connectivity
try:
if TOKEN:
user_info = whoami(token=TOKEN)
diagnostics["hf_connectivity"] = {
"status": "? Connected",
"user": user_info.get('name', 'Unknown'),
"authenticated": True
}
else:
# Test anonymous access
models = list(list_models(limit=1))
diagnostics["hf_connectivity"] = {
"status": "? Connected (anonymous)",
"authenticated": False,
"test_successful": len(models) > 0
}
except Exception as e:
diagnostics["hf_connectivity"] = {
"status": "? Connection failed",
"error": str(e)
}
# Check log file
log_file = '/tmp/hugmcp_debug.log'
if os.path.exists(log_file):
diagnostics["debug_logs"] = {
"log_file_exists": True,
"log_file_size": os.path.getsize(log_file),
"last_modified": time.ctime(os.path.getmtime(log_file))
}
return diagnostics
return safe_execute(_get_diagnostics, "diagnostics")
@mcp.tool()
def hf_model_evaluation(
action: str,
repo_id: str,
**kwargs
) -> Dict[str, Any]:
"""Advanced model evaluation and testing capabilities
Actions:
- analyze: Analyze model architecture, performance metrics, and compatibility
- compare: Compare multiple models side by side
- test_inference: Test model inference capabilities (if supported)
- get_metrics: Get model performance metrics and benchmarks
- validate_model: Validate model integrity and format
"""
def _evaluate_model():
if action == "analyze":
try:
# Get comprehensive model information
model_info_result = model_info(repo_id, token=TOKEN, files_metadata=True)
# Extract model card data
card_data = getattr(model_info_result, 'cardData', {}) or {}
# Analyze model files
file_analysis = {}
if hasattr(model_info_result, 'siblings'):
for sibling in model_info_result.siblings:
file_ext = Path(sibling.rfilename).suffix.lower()
file_size = getattr(sibling, 'size', 0) or 0
if file_ext not in file_analysis:
file_analysis[file_ext] = {"count": 0, "total_size": 0, "files": []}
file_analysis[file_ext]["count"] += 1
file_analysis[file_ext]["total_size"] += file_size
file_analysis[file_ext]["files"].append({
"name": sibling.rfilename,
"size": file_size
})
# Determine model framework
frameworks = []
if any(f.endswith('.bin') or f.endswith('.safetensors') for f in [s.rfilename for s in model_info_result.siblings]):
if 'pytorch_model' in str([s.rfilename for s in model_info_result.siblings]):
frameworks.append("PyTorch")
if 'model.safetensors' in str([s.rfilename for s in model_info_result.siblings]):
frameworks.append("SafeTensors")
if any('tf_model' in f for f in [s.rfilename for s in model_info_result.siblings]):
frameworks.append("TensorFlow")
if any('.onnx' in f for f in [s.rfilename for s in model_info_result.siblings]):
frameworks.append("ONNX")
return {
"repo_id": repo_id,
"analysis_timestamp": datetime.now().isoformat(),
"basic_info": format_repo_info(model_info_result),
"model_card": format_model_card_data(card_data),
"file_analysis": file_analysis,
"detected_frameworks": frameworks,
"model_size_estimate": sum(getattr(s, 'size', 0) or 0 for s in model_info_result.siblings if hasattr(model_info_result, 'siblings')),
"popularity_score": calculate_popularity_score(model_info_result),
"compatibility_info": {
"has_config": any('config.json' in s.rfilename for s in model_info_result.siblings if hasattr(model_info_result, 'siblings')),
"has_tokenizer": any('tokenizer' in s.rfilename for s in model_info_result.siblings if hasattr(model_info_result, 'siblings')),
"has_readme": any('README.md' in s.rfilename for s in model_info_result.siblings if hasattr(model_info_result, 'siblings'))
}
}
except Exception as e:
return {"error": f"? Model analysis failed: {str(e)}"}
elif action == "compare":
models = kwargs.get("models", [])
if not models or len(models) < 2:
return {"error": "? At least 2 models required for comparison"}
comparison_results = []
for model_id in models[:5]: # Limit to 5 models
try:
model_info_result = model_info(model_id, token=TOKEN)
comparison_results.append({
"repo_id": model_id,
"info": format_repo_info(model_info_result),
"popularity_score": calculate_popularity_score(model_info_result),
"model_size": sum(getattr(s, 'size', 0) or 0 for s in model_info_result.siblings if hasattr(model_info_result, 'siblings')),
"tags": getattr(model_info_result, 'tags', [])
})
except Exception as e:
comparison_results.append({
"repo_id": model_id,
"error": f"Failed to analyze: {str(e)}"
})
# Sort by popularity
valid_results = [r for r in comparison_results if "error" not in r]
valid_results.sort(key=lambda x: x["popularity_score"], reverse=True)
return {
"comparison_timestamp": datetime.now().isoformat(),
"models_compared": len(models),
"successful_analyses": len(valid_results),
"results": comparison_results,
"ranking": [r["repo_id"] for r in valid_results]
}
elif action == "test_inference":
if not TOKEN or not ENABLE_INFERENCE:
return {"error": "? Inference testing requires authentication and ENABLE_INFERENCE=true"}
test_input = kwargs.get("test_input", "Hello, world!")
try:
# Try to use inference API
api_client = get_inference_api(repo_id, token=TOKEN)
if api_client:
result = api_client(test_input)
return {
"repo_id": repo_id,
"test_input": test_input,
"inference_result": result,
"status": "? Inference successful",
"timestamp": datetime.now().isoformat()
}
else:
return {"error": "? Inference API not available for this model"}
except Exception as e:
return {
"repo_id": repo_id,
"test_input": test_input,
"error": f"? Inference failed: {str(e)}",
"timestamp": datetime.now().isoformat()
}
elif action == "validate_model":
try:
model_info_result = model_info(repo_id, token=TOKEN, files_metadata=True)
validation_results = {
"repo_id": repo_id,
"validation_timestamp": datetime.now().isoformat(),
"checks": {}
}
# Check for essential files
files = [s.rfilename for s in model_info_result.siblings] if hasattr(model_info_result, 'siblings') else []
validation_results["checks"]["has_model_files"] = any(
f.endswith(('.bin', '.safetensors', '.h5', '.onnx')) for f in files
)
validation_results["checks"]["has_config"] = 'config.json' in files
validation_results["checks"]["has_readme"] = any('README' in f for f in files)
validation_results["checks"]["has_license"] = any('LICENSE' in f for f in files)
# Check model card
card_data = getattr(model_info_result, 'cardData', {}) or {}
validation_results["checks"]["has_license_info"] = bool(card_data.get('license'))
validation_results["checks"]["has_pipeline_tag"] = bool(card_data.get('pipeline_tag'))
validation_results["checks"]["has_language_info"] = bool(card_data.get('language'))
# Calculate validation score
passed_checks = sum(1 for v in validation_results["checks"].values() if v)
total_checks = len(validation_results["checks"])
validation_results["validation_score"] = (passed_checks / total_checks) * 100
validation_results["validation_status"] = "? Passed" if validation_results["validation_score"] >= 70 else "?? Needs improvement"
return validation_results
except Exception as e:
return {"error": f"? Model validation failed: {str(e)}"}
else:
return {"error": f"? Invalid action: {action}. Use: analyze, compare, test_inference, validate_model"}
return safe_execute(_evaluate_model, f"model_evaluation_{action}")
@mcp.tool()
def hf_space_management(
action: str,
space_id: str,
**kwargs
) -> Dict[str, Any]:
"""Advanced Hugging Face Spaces management
Actions:
- runtime_info: Get space runtime information and status
- restart: Restart a space
- pause: Pause a space
- set_sleep_time: Set sleep time for a space
- duplicate: Duplicate a space
- monitor: Monitor space performance and logs
"""
def _manage_space():
if not HAS_SPACE_MANAGEMENT:
return {"error": "? Space management features not available in this huggingface_hub version"}
auth_error = validate_auth("space management")
if auth_error: return auth_error
if action == "runtime_info":
try:
runtime = get_space_runtime(space_id, token=TOKEN)
return {
"space_id": space_id,
"runtime_info": {
"stage": runtime.stage,
"hardware": getattr(runtime, 'hardware', 'unknown'),
"requested_hardware": getattr(runtime, 'requested_hardware', 'unknown'),
"sleep_time": getattr(runtime, 'sleep_time', None),
"raw_runtime": str(runtime)
},
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Failed to get runtime info: {str(e)}"}
elif action == "restart":
perm_error = validate_permissions(require_write=True)
if perm_error: return perm_error
try:
restart_space(space_id, token=TOKEN)
return {
"space_id": space_id,
"status": "? Space restart initiated",
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Failed to restart space: {str(e)}"}
elif action == "pause":
perm_error = validate_permissions(require_write=True)
if perm_error: return perm_error
try:
pause_space(space_id, token=TOKEN)
return {
"space_id": space_id,
"status": "?? Space paused",
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Failed to pause space: {str(e)}"}
elif action == "set_sleep_time":
perm_error = validate_permissions(require_write=True)
if perm_error: return perm_error
sleep_time = kwargs.get("sleep_time")
if sleep_time is None:
return {"error": "? sleep_time parameter required (in seconds)"}
try:
set_space_sleep_time(space_id, sleep_time, token=TOKEN)
return {
"space_id": space_id,
"sleep_time": sleep_time,
"status": f"?? Sleep time set to {sleep_time} seconds",
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Failed to set sleep time: {str(e)}"}
elif action == "duplicate":
to_id = kwargs.get("to_id")
if not to_id:
return {"error": "? to_id parameter required for duplication"}
try:
new_space = duplicate_space(space_id, to_id, token=TOKEN)
return {
"original_space": space_id,
"new_space": to_id,
"status": "? Space duplicated successfully",
"new_space_url": f"https://huggingface.co/spaces/{to_id}",
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Failed to duplicate space: {str(e)}"}
else:
return {"error": f"? Invalid action: {action}. Use: runtime_info, restart, pause, set_sleep_time, duplicate"}
return safe_execute(_manage_space, f"space_management_{action}")
@mcp.tool()
def hf_community_features(
action: str,
repo_id: str,
repo_type: str = "model",
**kwargs
) -> Dict[str, Any]:
"""Community features: likes, discussions, social interactions
Actions:
- like: Like a repository
- unlike: Unlike a repository
- get_likes: Get user's liked repositories
- create_discussion: Create a discussion (non-PR)
- list_discussions: List repository discussions
- get_commits: Get repository commit history
- get_refs: Get repository branches/tags
"""
def _community_features():
if action == "like":
auth_error = validate_auth("liking repositories")
if auth_error: return auth_error
try:
# Use HfApi directly for broader compatibility
api.like(repo_id, token=TOKEN, repo_type=repo_type)
return {
"repo_id": repo_id,
"repo_type": repo_type,
"status": "?? Repository liked",
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Failed to like repository: {str(e)}"}
elif action == "unlike":
auth_error = validate_auth("unliking repositories")
if auth_error: return auth_error
try:
# Use HfApi directly for broader compatibility
api.unlike(repo_id, token=TOKEN, repo_type=repo_type)
return {
"repo_id": repo_id,
"repo_type": repo_type,
"status": "?? Repository unliked",
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Failed to unlike repository: {str(e)}"}
elif action == "get_likes":
if not HAS_ADVANCED_REPO:
return {"error": "? Advanced repository features not available in this huggingface_hub version"}
auth_error = validate_auth("getting liked repositories")
if auth_error: return auth_error
try:
user = kwargs.get("user") or get_user_namespace()
liked_repos = list(list_liked_repos(user, token=TOKEN))
result = {
"user": user,
"total_likes": len(liked_repos),
"liked_repositories": []
}
for repo in liked_repos[:50]: # Limit to 50 for performance
result["liked_repositories"].append({
"id": repo.id,
"author": repo.author,
"likes": getattr(repo, 'likes', 0),
"downloads": getattr(repo, 'downloads', 0),
"created_at": repo.created_at.isoformat() if repo.created_at else None
})
return result
except Exception as e:
return {"error": f"? Failed to get liked repositories: {str(e)}"}
elif action == "create_discussion":
auth_error = validate_auth("creating discussions")
if auth_error: return auth_error
title = kwargs.get("title")
description = kwargs.get("description", "")
if not title:
return {"error": "? title parameter required"}
try:
discussion = create_discussion(
repo_id=repo_id,
title=title,
description=description,
repo_type=repo_type,
pull_request=False,
token=TOKEN
)
return {
"repo_id": repo_id,
"discussion_number": discussion.num,
"title": discussion.title,
"url": discussion.url,
"status": "?? Discussion created",
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Failed to create discussion: {str(e)}"}
elif action == "get_commits":
if not HAS_ADVANCED_REPO:
return {"error": "? Advanced repository features not available in this huggingface_hub version"}
try:
commits = list(list_repo_commits(repo_id, repo_type=repo_type, token=TOKEN))
result = {
"repo_id": repo_id,
"repo_type": repo_type,
"total_commits": len(commits),
"recent_commits": []
}
for commit in commits[:20]: # Show last 20 commits
result["recent_commits"].append({
"commit_id": commit.commit_id,
"title": commit.title,
"message": getattr(commit, 'message', ''),
"author": getattr(commit, 'authors', []),
"created_at": commit.created_at.isoformat() if commit.created_at else None
})
return result
except Exception as e:
return {"error": f"? Failed to get commits: {str(e)}"}
elif action == "get_refs":
if not HAS_ADVANCED_REPO:
return {"error": "? Advanced repository features not available in this huggingface_hub version"}
try:
refs = list_repo_refs(repo_id, repo_type=repo_type, token=TOKEN)
branches = []
tags = []
for ref in refs:
if ref.ref.startswith('refs/heads/'):
branches.append({
"name": ref.ref.replace('refs/heads/', ''),
"target_commit": ref.target_commit
})
elif ref.ref.startswith('refs/tags/'):
tags.append({
"name": ref.ref.replace('refs/tags/', ''),
"target_commit": ref.target_commit
})
return {
"repo_id": repo_id,
"repo_type": repo_type,
"branches": branches,
"tags": tags,
"total_refs": len(refs)
}
except Exception as e:
return {"error": f"? Failed to get refs: {str(e)}"}
else:
return {"error": f"? Invalid action: {action}. Use: like, unlike, get_likes, create_discussion, get_commits, get_refs"}
return safe_execute(_community_features, f"community_{action}")
@mcp.tool()
def hf_dataset_processing(
action: str,
dataset_id: str,
**kwargs
) -> Dict[str, Any]:
"""Advanced dataset processing and analysis tools
Actions:
- analyze: Analyze dataset structure, size, and metadata
- preview: Preview dataset content and samples
- validate: Validate dataset format and completeness
- compare: Compare multiple datasets
- get_splits: Get information about dataset splits
"""
def _process_dataset():
if action == "analyze":
try:
# Get dataset information
dataset_info_result = dataset_info(dataset_id, token=TOKEN, files_metadata=True)
# Extract dataset card data
card_data = getattr(dataset_info_result, 'cardData', {}) or {}
# Analyze dataset files
file_analysis = {}
total_size = 0
if hasattr(dataset_info_result, 'siblings'):
for sibling in dataset_info_result.siblings:
file_ext = Path(sibling.rfilename).suffix.lower()
file_size = getattr(sibling, 'size', 0) or 0
total_size += file_size
if file_ext not in file_analysis:
file_analysis[file_ext] = {"count": 0, "total_size": 0, "files": []}
file_analysis[file_ext]["count"] += 1
file_analysis[file_ext]["total_size"] += file_size
file_analysis[file_ext]["files"].append({
"name": sibling.rfilename,
"size": file_size
})
# Detect dataset format
dataset_formats = []
file_names = [s.rfilename for s in dataset_info_result.siblings] if hasattr(dataset_info_result, 'siblings') else []
if any(f.endswith('.parquet') for f in file_names):
dataset_formats.append("Parquet")
if any(f.endswith('.json') or f.endswith('.jsonl') for f in file_names):
dataset_formats.append("JSON/JSONL")
if any(f.endswith('.csv') for f in file_names):
dataset_formats.append("CSV")
if any(f.endswith('.arrow') for f in file_names):
dataset_formats.append("Arrow")
return {
"dataset_id": dataset_id,
"analysis_timestamp": datetime.now().isoformat(),
"basic_info": format_repo_info(dataset_info_result),
"dataset_card": format_model_card_data(card_data),
"file_analysis": file_analysis,
"total_size_bytes": total_size,
"total_size_mb": round(total_size / 1024 / 1024, 2),
"detected_formats": dataset_formats,
"popularity_score": calculate_popularity_score(dataset_info_result),
"metadata": {
"has_dataset_info": any('dataset_info' in f for f in file_names),
"has_readme": any('README.md' in f for f in file_names),
"has_config": any('config.json' in f for f in file_names),
"file_count": len(file_names)
}
}
except Exception as e:
return {"error": f"? Dataset analysis failed: {str(e)}"}
elif action == "compare":
datasets = kwargs.get("datasets", [])
if not datasets or len(datasets) < 2:
return {"error": "? At least 2 datasets required for comparison"}
comparison_results = []
for dataset_id_cmp in datasets[:5]: # Limit to 5 datasets
try:
dataset_info_result = dataset_info(dataset_id_cmp, token=TOKEN)
total_size = sum(getattr(s, 'size', 0) or 0 for s in dataset_info_result.siblings if hasattr(dataset_info_result, 'siblings'))
comparison_results.append({
"dataset_id": dataset_id_cmp,
"info": format_repo_info(dataset_info_result),
"popularity_score": calculate_popularity_score(dataset_info_result),
"total_size": total_size,
"tags": getattr(dataset_info_result, 'tags', [])
})
except Exception as e:
comparison_results.append({
"dataset_id": dataset_id_cmp,
"error": f"Failed to analyze: {str(e)}"
})
# Sort by popularity
valid_results = [r for r in comparison_results if "error" not in r]
valid_results.sort(key=lambda x: x["popularity_score"], reverse=True)
return {
"comparison_timestamp": datetime.now().isoformat(),
"datasets_compared": len(datasets),
"successful_analyses": len(valid_results),
"results": comparison_results,
"ranking": [r["dataset_id"] for r in valid_results]
}
elif action == "validate":
try:
dataset_info_result = dataset_info(dataset_id, token=TOKEN, files_metadata=True)
validation_results = {
"dataset_id": dataset_id,
"validation_timestamp": datetime.now().isoformat(),
"checks": {}
}
# Check for essential files
files = [s.rfilename for s in dataset_info_result.siblings] if hasattr(dataset_info_result, 'siblings') else []
validation_results["checks"]["has_data_files"] = any(
f.endswith(('.parquet', '.json', '.jsonl', '.csv', '.arrow')) for f in files
)
validation_results["checks"]["has_readme"] = any('README' in f for f in files)
validation_results["checks"]["has_license"] = any('LICENSE' in f for f in files)
validation_results["checks"]["has_dataset_info"] = any('dataset_info' in f for f in files)
# Check dataset card
card_data = getattr(dataset_info_result, 'cardData', {}) or {}
validation_results["checks"]["has_license_info"] = bool(card_data.get('license'))
validation_results["checks"]["has_language_info"] = bool(card_data.get('language'))
validation_results["checks"]["has_task_info"] = bool(card_data.get('task_categories'))
# Calculate validation score
passed_checks = sum(1 for v in validation_results["checks"].values() if v)
total_checks = len(validation_results["checks"])
validation_results["validation_score"] = (passed_checks / total_checks) * 100
validation_results["validation_status"] = "? Passed" if validation_results["validation_score"] >= 70 else "?? Needs improvement"
return validation_results
except Exception as e:
return {"error": f"? Dataset validation failed: {str(e)}"}
else:
return {"error": f"? Invalid action: {action}. Use: analyze, compare, validate"}
return safe_execute(_process_dataset, f"dataset_processing_{action}")
@mcp.tool()
def hf_license_management(
action: str,
repo_id: str,
repo_type: str = "model",
**kwargs
) -> Dict[str, Any]:
"""License management and compliance tools
Actions:
- check_license: Check repository license information
- validate_compliance: Validate license compliance
- suggest_license: Suggest appropriate license based on content
- update_license: Update repository license information
- compare_licenses: Compare licenses across repositories
"""
def _manage_license():
if action == "check_license":
try:
# Get repository information
if repo_type == "model":
repo_info = model_info(repo_id, token=TOKEN, files_metadata=True)
elif repo_type == "dataset":
repo_info = dataset_info(repo_id, token=TOKEN, files_metadata=True)
else:
return {"error": f"? Unsupported repo_type: {repo_type}"}
# Extract license information
card_data = getattr(repo_info, 'cardData', {}) or {}
license_from_card = card_data.get('license')
# Check for LICENSE file
files = [s.rfilename for s in repo_info.siblings] if hasattr(repo_info, 'siblings') else []
license_files = [f for f in files if 'LICENSE' in f.upper() or 'LICENCE' in f.upper()]
# Common licenses mapping
common_licenses = {
'apache-2.0': 'Apache License 2.0',
'mit': 'MIT License',
'gpl-3.0': 'GNU General Public License v3.0',
'bsd-3-clause': 'BSD 3-Clause License',
'cc-by-4.0': 'Creative Commons Attribution 4.0',
'cc-by-sa-4.0': 'Creative Commons Attribution-ShareAlike 4.0',
'openrail': 'OpenRAIL License',
'other': 'Other/Custom License'
}
return {
"repo_id": repo_id,
"repo_type": repo_type,
"license_info": {
"license_from_card": license_from_card,
"license_display_name": common_licenses.get(license_from_card, license_from_card),
"license_files": license_files,
"has_license_file": len(license_files) > 0,
"has_license_in_card": bool(license_from_card)
},
"compliance_status": "? Complete" if license_from_card and license_files else "?? Incomplete",
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? License check failed: {str(e)}"}
elif action == "validate_compliance":
try:
# Get license info first
license_info = hf_license_management("check_license", repo_id, repo_type)
if "error" in license_info:
return license_info
compliance_issues = []
score = 100
# Check for license in model card
if not license_info["license_info"]["has_license_in_card"]:
compliance_issues.append("? No license specified in model card")
score -= 30
# Check for LICENSE file
if not license_info["license_info"]["has_license_file"]:
compliance_issues.append("? No LICENSE file found")
score -= 20
# Check for appropriate license type
license_type = license_info["license_info"]["license_from_card"]
if license_type == "other":
compliance_issues.append("?? Custom license - manual review recommended")
score -= 10
return {
"repo_id": repo_id,
"repo_type": repo_type,
"compliance_score": max(0, score),
"compliance_grade": "A" if score >= 90 else "B" if score >= 70 else "C" if score >= 50 else "D",
"issues": compliance_issues,
"recommendations": [
"Add license to model card metadata" if not license_info["license_info"]["has_license_in_card"] else None,
"Add LICENSE file to repository" if not license_info["license_info"]["has_license_file"] else None,
"Consider using standard open source license" if license_type == "other" else None
],
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Compliance validation failed: {str(e)}"}
elif action == "suggest_license":
content_type = kwargs.get("content_type", "model")
commercial_use = kwargs.get("commercial_use", True)
derivatives_allowed = kwargs.get("derivatives_allowed", True)
share_alike = kwargs.get("share_alike", False)
suggestions = []
if commercial_use and derivatives_allowed and not share_alike:
suggestions.append({
"license": "apache-2.0",
"name": "Apache License 2.0",
"description": "Permissive license with patent grant",
"best_for": "Commercial projects, wide adoption"
})
suggestions.append({
"license": "mit",
"name": "MIT License",
"description": "Simple permissive license",
"best_for": "Simple projects, maximum freedom"
})
if content_type == "dataset":
suggestions.append({
"license": "cc-by-4.0",
"name": "Creative Commons Attribution 4.0",
"description": "Free use with attribution",
"best_for": "Datasets, educational content"
})
if content_type == "model" and not commercial_use:
suggestions.append({
"license": "openrail",
"name": "OpenRAIL License",
"description": "Responsible AI license with use restrictions",
"best_for": "AI models with ethical constraints"
})
return {
"content_type": content_type,
"preferences": {
"commercial_use": commercial_use,
"derivatives_allowed": derivatives_allowed,
"share_alike": share_alike
},
"suggestions": suggestions,
"timestamp": datetime.now().isoformat()
}
else:
return {"error": f"? Invalid action: {action}. Use: check_license, validate_compliance, suggest_license"}
return safe_execute(_manage_license, f"license_management_{action}")
@mcp.tool()
def hf_inference_tools(
action: str,
repo_id: str,
**kwargs
) -> Dict[str, Any]:
"""Advanced inference and model testing tools
Actions:
- test_inference: Test model inference with custom inputs
- batch_inference: Run inference on multiple inputs
- benchmark_performance: Benchmark model performance
- check_endpoints: Check available inference endpoints
"""
def _inference_tools():
if not HAS_INFERENCE_API:
return {"error": "? Inference API features not available in this huggingface_hub version"}
if not TOKEN or not ENABLE_INFERENCE:
return {"error": "? Inference requires authentication and ENABLE_INFERENCE=true"}
if action == "test_inference":
inputs = kwargs.get("inputs", ["Hello world"])
parameters = kwargs.get("parameters", {})
if isinstance(inputs, str):
inputs = [inputs]
try:
# Get inference API client
api_client = get_inference_api(repo_id, token=TOKEN)
if not api_client:
return {"error": "? Inference API not available for this model"}
results = []
for i, input_text in enumerate(inputs[:10]): # Limit to 10 inputs
try:
result = api_client(input_text, parameters=parameters)
results.append({
"input_index": i,
"input": input_text,
"output": result,
"status": "success"
})
except Exception as e:
results.append({
"input_index": i,
"input": input_text,
"error": str(e),
"status": "failed"
})
successful_inferences = len([r for r in results if r["status"] == "success"])
return {
"repo_id": repo_id,
"total_inputs": len(inputs),
"successful_inferences": successful_inferences,
"success_rate": (successful_inferences / len(inputs)) * 100,
"results": results,
"parameters_used": parameters,
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Inference test failed: {str(e)}"}
elif action == "check_endpoints":
try:
# Try to get inference endpoints (using HfApi for compatibility)
try:
endpoints = api.list_inference_endpoints(token=TOKEN)
except AttributeError:
# Fallback if method doesn't exist
endpoints = []
# Filter for this model if possible
model_endpoints = []
for endpoint in endpoints:
if hasattr(endpoint, 'repository') and endpoint.repository == repo_id:
model_endpoints.append({
"name": endpoint.name,
"status": getattr(endpoint, 'status', 'unknown'),
"compute": getattr(endpoint, 'compute', 'unknown'),
"url": getattr(endpoint, 'url', None)
})
return {
"repo_id": repo_id,
"dedicated_endpoints": model_endpoints,
"serverless_available": bool(get_inference_api(repo_id, token=TOKEN)),
"total_user_endpoints": len(endpoints),
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Failed to check endpoints: {str(e)}"}
else:
return {"error": f"? Invalid action: {action}. Use: test_inference, check_endpoints"}
return safe_execute(_inference_tools, f"inference_{action}")
@mcp.tool()
def hf_ai_workflow_tools(
action: str,
**kwargs
) -> Dict[str, Any]:
"""Specialized AI workflow and automation tools
Actions:
- create_model_card: Generate comprehensive model cards
- bulk_operations: Perform bulk operations across repositories
- workflow_automation: Automate common ML workflows
- generate_readme: Generate README files for repositories
- validate_pipeline: Validate complete ML pipelines
"""
def _workflow_tools():
if action == "create_model_card":
repo_id = kwargs.get("repo_id")
if not repo_id:
return {"error": "? repo_id parameter required"}
model_type = kwargs.get("model_type", "text-generation")
language = kwargs.get("language", ["en"])
license_type = kwargs.get("license", "apache-2.0")
datasets = kwargs.get("datasets", [])
metrics = kwargs.get("metrics", [])
# Generate comprehensive model card content
model_card_content = f"""---
license: {license_type}
language: {language if isinstance(language, list) else [language]}
pipeline_tag: {model_type}
tags:
- {model_type}
- transformers
- ai
datasets: {datasets if datasets else []}
metrics: {metrics if metrics else []}
---
# {repo_id}
## Model Description
This is a {model_type} model designed for high-performance inference and applications.
### Model Architecture
- **Model Type**: {model_type}
- **Language(s)**: {', '.join(language) if isinstance(language, list) else language}
- **License**: {license_type}
### Intended Use
This model is intended for:
- Research and development
- Educational purposes
- Commercial applications (subject to license terms)
### Training Data
{"Training datasets: " + ", ".join(datasets) if datasets else "Training data information not specified."}
### Performance Metrics
{"Performance metrics: " + ", ".join(metrics) if metrics else "Performance metrics not specified."}
### Usage
```python
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("{repo_id}")
model = AutoModel.from_pretrained("{repo_id}")
# Example usage
inputs = tokenizer("Hello, world!", return_tensors="pt")
outputs = model(**inputs)
```
### Limitations and Bias
Please refer to the model's limitations and potential biases before deployment.
### Citation
If you use this model, please cite:
```
@misc{{{repo_id.replace('/', '_')},
title={{{repo_id}}},
author={{Model Author}},
year={{2024}},
url={{https://huggingface.co/{repo_id}}}
}}
```
### Contact
For questions and support, please contact the model authors.
"""
return {
"repo_id": repo_id,
"model_card_content": model_card_content,
"metadata": {
"license": license_type,
"language": language,
"model_type": model_type,
"datasets": datasets,
"metrics": metrics
},
"status": "? Model card generated",
"timestamp": datetime.now().isoformat()
}
elif action == "bulk_operations":
repo_list = kwargs.get("repo_list", [])
operation = kwargs.get("operation", "info")
if not repo_list:
return {"error": "? repo_list parameter required"}
results = []
for repo_id in repo_list[:10]: # Limit to 10 repos
try:
if operation == "info":
result = hf_repository_manager("info", repo_id)
elif operation == "like":
result = hf_community_features("like", repo_id)
elif operation == "validate":
result = hf_model_evaluation("validate_model", repo_id)
else:
result = {"error": f"Unsupported operation: {operation}"}
results.append({
"repo_id": repo_id,
"operation": operation,
"result": result,
"status": "success" if "error" not in result else "failed"
})
except Exception as e:
results.append({
"repo_id": repo_id,
"operation": operation,
"error": str(e),
"status": "failed"
})
successful = len([r for r in results if r["status"] == "success"])
return {
"operation": operation,
"total_repos": len(repo_list),
"processed_repos": len(results),
"successful_operations": successful,
"success_rate": (successful / len(results)) * 100 if results else 0,
"results": results,
"timestamp": datetime.now().isoformat()
}
elif action == "generate_readme":
repo_id = kwargs.get("repo_id")
repo_type = kwargs.get("repo_type", "model")
if not repo_id:
return {"error": "? repo_id parameter required"}
# Generate README content based on repository analysis
try:
if repo_type == "model":
analysis = hf_model_evaluation("analyze", repo_id)
elif repo_type == "dataset":
analysis = hf_dataset_processing("analyze", repo_id)
else:
return {"error": f"? Unsupported repo_type: {repo_type}"}
if "error" in analysis:
return analysis
basic_info = analysis.get("basic_info", {})
readme_content = f"""# {repo_id}
## Overview
{basic_info.get('id', repo_id)} is a {'machine learning model' if repo_type == 'model' else 'dataset'} hosted on Hugging Face.
### Quick Stats
- **Downloads**: {basic_info.get('downloads', 'N/A'):,}
- **Likes**: {basic_info.get('likes', 'N/A')}
- **Created**: {basic_info.get('created_at', 'N/A')}
- **Last Modified**: {basic_info.get('last_modified', 'N/A')}
### Files
This repository contains the following files:
{chr(10).join([f"- `{file}`" for file in basic_info.get('files', [])[:10]])}
### Usage
```python
# Installation
pip install transformers
# Usage example
from transformers import AutoTokenizer{'from transformers import AutoModel' if repo_type == 'model' else '# Load your dataset'}
{'tokenizer = AutoTokenizer.from_pretrained("' + repo_id + '")' if repo_type == 'model' else '# Load dataset'}
{'model = AutoModel.from_pretrained("' + repo_id + '")' if repo_type == 'model' else ''}
```
### License
Please check the repository for license information.
### Citation
```
@misc{{{repo_id.replace('/', '_')},
title={{{repo_id}}},
url={{https://huggingface.co/{repo_id}}},
year={{2024}}
}}
```
---
*This README was auto-generated by HuggingMCP.*
"""
return {
"repo_id": repo_id,
"repo_type": repo_type,
"readme_content": readme_content,
"status": "? README generated",
"timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? README generation failed: {str(e)}"}
elif action == "validate_pipeline":
pipeline_components = kwargs.get("components", [])
if not pipeline_components:
return {"error": "? components parameter required (list of repo_ids)"}
validation_results = []
for component in pipeline_components:
try:
# Validate each component
result = hf_model_evaluation("validate_model", component)
validation_results.append({
"component": component,
"validation": result,
"status": "valid" if result.get("validation_score", 0) >= 70 else "invalid"
})
except Exception as e:
validation_results.append({
"component": component,
"error": str(e),
"status": "error"
})
valid_components = len([r for r in validation_results if r["status"] == "valid"])
return {
"pipeline_validation": {
"total_components": len(pipeline_components),
"valid_components": valid_components,
"pipeline_health": (valid_components / len(pipeline_components)) * 100,
"pipeline_status": "? Healthy" if valid_components == len(pipeline_components) else "?? Needs attention"
},
"component_results": validation_results,
"timestamp": datetime.now().isoformat()
}
else:
return {"error": f"? Invalid action: {action}. Use: create_model_card, bulk_operations, generate_readme, validate_pipeline"}
return safe_execute(_workflow_tools, f"workflow_{action}")
@mcp.tool()
def hf_advanced_analytics(
action: str,
**kwargs
) -> Dict[str, Any]:
"""Advanced analytics and insights for HuggingFace repositories
Actions:
- trending_analysis: Analyze trending models/datasets
- user_analytics: Analyze user's repository portfolio
- comparative_analysis: Deep comparison of repositories
- ecosystem_insights: Insights about the HF ecosystem
- recommendation_engine: Recommend repositories based on criteria
"""
def _analytics():
if action == "trending_analysis":
content_type = kwargs.get("content_type", "models")
limit = kwargs.get("limit", 50)
time_period = kwargs.get("time_period", "week") # week, month, all
try:
# Get repositories and analyze trends
if content_type == "models":
repos = list(list_models(limit=limit, sort="downloads", direction=-1, token=TOKEN))
elif content_type == "datasets":
repos = list(list_datasets(limit=limit, token=TOKEN))
else:
return {"error": f"? Unsupported content_type: {content_type}"}
trending_data = []
for repo in repos:
popularity_score = calculate_popularity_score(repo)
trending_data.append({
"repo_id": repo.id,
"author": repo.author,
"downloads": getattr(repo, 'downloads', 0),
"likes": getattr(repo, 'likes', 0),
"popularity_score": popularity_score,
"created_at": repo.created_at.isoformat() if repo.created_at else None,
"tags": getattr(repo, 'tags', [])[:5]
})
# Sort by popularity
trending_data.sort(key=lambda x: x["popularity_score"], reverse=True)
# Calculate trend metrics
total_downloads = sum(item["downloads"] for item in trending_data)
total_likes = sum(item["likes"] for item in trending_data)
# Analyze tags
tag_frequency = defaultdict(int)
for item in trending_data:
for tag in item["tags"]:
tag_frequency[tag] += 1
top_tags = sorted(tag_frequency.items(), key=lambda x: x[1], reverse=True)[:10]
return {
"content_type": content_type,
"time_period": time_period,
"analysis_timestamp": datetime.now().isoformat(),
"summary": {
"total_repositories": len(trending_data),
"total_downloads": total_downloads,
"total_likes": total_likes,
"average_popularity": sum(item["popularity_score"] for item in trending_data) / len(trending_data) if trending_data else 0
},
"top_repositories": trending_data[:20],
"trending_tags": [{"tag": tag, "frequency": freq} for tag, freq in top_tags],
"insights": {
"most_popular": trending_data[0]["repo_id"] if trending_data else None,
"top_author": max(set(item["author"] for item in trending_data),
key=lambda x: sum(1 for item in trending_data if item["author"] == x)) if trending_data else None
}
}
except Exception as e:
return {"error": f"? Trending analysis failed: {str(e)}"}
elif action == "recommendation_engine":
user_preferences = kwargs.get("preferences", {})
content_type = kwargs.get("content_type", "models")
limit = kwargs.get("limit", 20)
# Extract preferences
preferred_tags = user_preferences.get("tags", [])
preferred_authors = user_preferences.get("authors", [])
min_downloads = user_preferences.get("min_downloads", 100)
max_model_size = user_preferences.get("max_size_mb", 1000)
try:
# Search with filters
if content_type == "models":
all_repos = list(list_models(limit=limit*3, token=TOKEN)) # Get more to filter
else:
all_repos = list(list_datasets(limit=limit*3, token=TOKEN))
recommendations = []
for repo in all_repos:
score = 0
downloads = getattr(repo, 'downloads', 0)
# Skip if below minimum downloads
if downloads < min_downloads:
continue
# Score based on preferences
repo_tags = getattr(repo, 'tags', [])
# Tag matching
tag_matches = len(set(preferred_tags) & set(repo_tags))
score += tag_matches * 10
# Author preference
if repo.author in preferred_authors:
score += 20
# Popularity boost
score += min(downloads / 1000, 50) # Max 50 points for downloads
score += getattr(repo, 'likes', 0) * 0.1
recommendations.append({
"repo_id": repo.id,
"author": repo.author,
"downloads": downloads,
"likes": getattr(repo, 'likes', 0),
"tags": repo_tags[:5],
"recommendation_score": score,
"match_reasons": {
"tag_matches": tag_matches,
"preferred_author": repo.author in preferred_authors,
"popularity": downloads
}
})
# Sort by recommendation score
recommendations.sort(key=lambda x: x["recommendation_score"], reverse=True)
return {
"content_type": content_type,
"user_preferences": user_preferences,
"total_candidates": len(all_repos),
"qualified_recommendations": len(recommendations),
"top_recommendations": recommendations[:limit],
"recommendation_timestamp": datetime.now().isoformat()
}
except Exception as e:
return {"error": f"? Recommendation engine failed: {str(e)}"}
else:
return {"error": f"? Invalid action: {action}. Use: trending_analysis, recommendation_engine"}
return safe_execute(_analytics, f"analytics_{action}")
@mcp.tool()
def hf_repository_utilities(
action: str,
repo_id: str,
repo_type: str = "model",
**kwargs
) -> Dict[str, Any]:
"""Advanced repository utilities and management tools
Actions:
- clone_metadata: Clone repository metadata and structure
- backup_info: Create comprehensive backup information
- migrate_repo: Migrate repository between organizations
- archive_repo: Archive repository with full metadata
- repository_health: Comprehensive repository health check
"""
def _repo_utilities():
if action == "repository_health":
try:
# Comprehensive health check
health_report = {
"repo_id": repo_id,
"repo_type": repo_type,
"health_check_timestamp": datetime.now().isoformat(),
"checks": {},
"recommendations": [],
"overall_score": 0
}
# Get repository info
if repo_type == "model":
repo_info = model_info(repo_id, token=TOKEN, files_metadata=True)
elif repo_type == "dataset":
repo_info = dataset_info(repo_id, token=TOKEN, files_metadata=True)
else:
return {"error": f"? Unsupported repo_type: {repo_type}"}
# Check 1: Basic metadata
card_data = getattr(repo_info, 'cardData', {}) or {}
health_report["checks"]["has_license"] = bool(card_data.get('license'))
health_report["checks"]["has_description"] = bool(getattr(repo_info, 'description', ''))
health_report["checks"]["has_tags"] = len(getattr(repo_info, 'tags', [])) > 0
# Check 2: Files
files = [s.rfilename for s in repo_info.siblings] if hasattr(repo_info, 'siblings') else []
health_report["checks"]["has_readme"] = any('README' in f for f in files)
health_report["checks"]["has_model_files"] = any(f.endswith(('.bin', '.safetensors', '.h5', '.onnx', '.parquet')) for f in files)
health_report["checks"]["file_count_reasonable"] = 1 <= len(files) <= 100
# Check 3: Size and organization
total_size = sum(getattr(s, 'size', 0) or 0 for s in repo_info.siblings if hasattr(repo_info, 'siblings'))
health_report["checks"]["reasonable_size"] = total_size < MAX_FILE_SIZE * 10 # 10x normal limit
# Check 4: Community engagement
downloads = getattr(repo_info, 'downloads', 0)
likes = getattr(repo_info, 'likes', 0)
health_report["checks"]["has_community_interest"] = downloads > 50 or likes > 5
# Calculate overall score
passed_checks = sum(1 for v in health_report["checks"].values() if v)
total_checks = len(health_report["checks"])
health_report["overall_score"] = (passed_checks / total_checks) * 100
# Generate recommendations
if not health_report["checks"]["has_license"]:
health_report["recommendations"].append("Add license information to model card")
if not health_report["checks"]["has_readme"]:
health_report["recommendations"].append("Add README.md file with documentation")
if not health_report["checks"]["has_tags"]:
health_report["recommendations"].append("Add relevant tags for better discoverability")
if not health_report["checks"]["has_community_interest"]:
health_report["recommendations"].append("Promote repository to increase visibility")
# Health grade
score = health_report["overall_score"]
health_report["health_grade"] = "A" if score >= 90 else "B" if score >= 75 else "C" if score >= 60 else "D"
health_report["health_status"] = "? Excellent" if score >= 90 else "?? Good" if score >= 75 else "?? Fair" if score >= 60 else "? Needs improvement"
return health_report
except Exception as e:
return {"error": f"? Repository health check failed: {str(e)}"}
elif action == "backup_info":
try:
# Create comprehensive backup information
if repo_type == "model":
repo_info = model_info(repo_id, token=TOKEN, files_metadata=True)
elif repo_type == "dataset":
repo_info = dataset_info(repo_id, token=TOKEN, files_metadata=True)
else:
return {"error": f"? Unsupported repo_type: {repo_type}"}
# Get additional information (if available)
commits = []
refs = []
if HAS_ADVANCED_REPO:
try:
commits = list(list_repo_commits(repo_id, repo_type=repo_type, token=TOKEN))
refs = list_repo_refs(repo_id, repo_type=repo_type, token=TOKEN)
except Exception:
pass # Gracefully handle if features aren't available
backup_info = {
"repo_id": repo_id,
"repo_type": repo_type,
"backup_timestamp": datetime.now().isoformat(),
"repository_metadata": {
"basic_info": format_repo_info(repo_info),
"card_data": getattr(repo_info, 'cardData', {}),
"description": getattr(repo_info, 'description', ''),
"tags": getattr(repo_info, 'tags', []),
"private": getattr(repo_info, 'private', False)
},
"file_structure": [
{
"filename": s.rfilename,
"size": getattr(s, 'size', 0),
"lfs": getattr(s, 'lfs', False) if hasattr(s, 'lfs') else False
} for s in repo_info.siblings
] if hasattr(repo_info, 'siblings') else [],
"commit_history": [
{
"commit_id": c.commit_id,
"title": c.title,
"message": getattr(c, 'message', ''),
"created_at": c.created_at.isoformat() if c.created_at else None
} for c in commits[:20]
],
"references": [
{
"ref": r.ref,
"target_commit": r.target_commit
} for r in refs
],
"statistics": {
"total_files": len(repo_info.siblings) if hasattr(repo_info, 'siblings') else 0,
"total_size": sum(getattr(s, 'size', 0) or 0 for s in repo_info.siblings if hasattr(repo_info, 'siblings')),
"total_commits": len(commits),
"total_refs": len(refs)
}
}
return backup_info
except Exception as e:
return {"error": f"? Backup info creation failed: {str(e)}"}
else:
return {"error": f"? Invalid action: {action}. Use: repository_health, backup_info"}
return safe_execute(_repo_utilities, f"repo_utilities_{action}")
# =============================================================================
# SERVER STARTUP WITH ENHANCED DEBUGGING
# =============================================================================
def main():
"""Main server startup with comprehensive error handling"""
try:
debug_stderr("?? Starting HuggingMCP server...")
debug_stderr(f" ?? Script location: {__file__}")
debug_stderr(f" ?? Authenticated: {'?' if TOKEN else '?'}")
debug_stderr(f" ?? Admin Mode: {'?' if ADMIN_MODE else '?'}")
debug_stderr(f" ?? Read Only: {'?' if READ_ONLY else '?'}")
debug_stderr(f" ??? Commands: 18+ comprehensive tools available")
debug_stderr(f" ?? New: Model evaluation, dataset processing, AI workflows")
debug_stderr(f" ?? New: Community features, license management, analytics")
# Validate critical components
if not hasattr(mcp, 'run'):
debug_stderr("? MCP server missing run method", "ERROR")
sys.exit(1)
debug_stderr("? All components validated, starting server...")
# Run the server
mcp.run()
except KeyboardInterrupt:
debug_stderr("?? Server stopped by user (Ctrl+C)")
sys.exit(0)
except Exception as e:
debug_stderr(f"?? Fatal error during server startup: {e}", "ERROR")
debug_stderr(f"?? Traceback: {traceback.format_exc()}", "ERROR")
logger.error(f"Fatal server error: {e}")
logger.error(traceback.format_exc())
sys.exit(1)
if __name__ == "__main__":
debug_stderr("?? HuggingMCP script executed as main module")
main()