Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Hugging Face Spaces Demo for OpenProblems Spatial Transcriptomics MCP Server | |
| This is a demo version adapted for HF Spaces deployment that showcases | |
| the MCP server capabilities in a user-friendly Gradio interface. | |
| """ | |
| import gradio as gr | |
| import json | |
| import os | |
| from typing import Dict, Any, List | |
| class MockMCPServer: | |
| """Mock MCP server for HF Spaces demo (without external tool dependencies).""" | |
| def __init__(self): | |
| self.tools_info = { | |
| "check_environment": "Check if bioinformatics tools are available", | |
| "validate_nextflow_config": "Validate Nextflow pipeline syntax", | |
| "run_nextflow_workflow": "Execute Nextflow workflows", | |
| "run_viash_component": "Run Viash components", | |
| "build_docker_image": "Build Docker containers", | |
| "analyze_nextflow_log": "Analyze pipeline execution logs", | |
| "read_file": "Read file contents", | |
| "write_file": "Write files", | |
| "list_directory": "List directory contents", | |
| "list_available_tools": "List all MCP tools", | |
| "echo_test": "Test MCP connectivity" | |
| } | |
| self.resources_info = { | |
| "server://status": "MCP server status and capabilities", | |
| "documentation://nextflow": "Nextflow best practices", | |
| "documentation://viash": "Viash component guidelines", | |
| "documentation://docker": "Docker optimization tips", | |
| "templates://spatial-workflows": "Spatial transcriptomics templates" | |
| } | |
| def check_environment(self, tools_to_check: str = "nextflow,viash,docker,java") -> str: | |
| """Mock environment check for HF Spaces.""" | |
| tools = [tool.strip() for tool in tools_to_check.split(",")] | |
| # Simulate environment check results | |
| results = { | |
| "environment_check": { | |
| "timestamp": "2024-01-20T10:30:00Z", | |
| "platform": "Hugging Face Spaces (Ubuntu 20.04)", | |
| "python_version": "3.10.14" | |
| }, | |
| "tools_status": {}, | |
| "recommendations": [] | |
| } | |
| # Mock results for demo | |
| for tool in tools: | |
| if tool == "docker": | |
| results["tools_status"][tool] = { | |
| "available": False, | |
| "version": None, | |
| "status": "Not available in HF Spaces environment", | |
| "required_for": "Container-based workflows" | |
| } | |
| results["recommendations"].append(f"For production: Install {tool} on your local system") | |
| else: | |
| results["tools_status"][tool] = { | |
| "available": False, | |
| "version": None, | |
| "status": "Demo environment - tools not installed", | |
| "install_command": f"Install with: curl -s https://get.{tool}.io | bash" if tool in ["nextflow", "viash"] else "sudo apt install openjdk-17-jre-headless" | |
| } | |
| results["summary"] = f"Demo mode: {len(tools)} tools checked, 0 available (expected in HF Spaces)" | |
| results["note"] = "This is a demo environment. In production, install tools locally for full functionality." | |
| return json.dumps(results, indent=2) | |
| def validate_nextflow_config(self, pipeline_content: str) -> str: | |
| """Mock Nextflow validation for demo.""" | |
| if not pipeline_content.strip(): | |
| return json.dumps({"error": "No pipeline content provided"}, indent=2) | |
| # Basic syntax checks for demo | |
| validation_results = { | |
| "validation_status": "demo_mode", | |
| "pipeline_analysis": { | |
| "dsl_version": "DSL2" if "nextflow.enable.dsl=2" in pipeline_content or "workflow {" in pipeline_content else "DSL1", | |
| "processes_found": pipeline_content.count("process "), | |
| "workflows_found": pipeline_content.count("workflow "), | |
| "includes_found": pipeline_content.count("include "), | |
| "line_count": len(pipeline_content.split('\n')) | |
| }, | |
| "basic_checks": { | |
| "has_shebang": pipeline_content.startswith("#!/usr/bin/env nextflow"), | |
| "has_workflow_block": "workflow {" in pipeline_content, | |
| "has_process_definitions": "process " in pipeline_content, | |
| "uses_containers": "container " in pipeline_content or "docker" in pipeline_content, | |
| }, | |
| "recommendations": [], | |
| "demo_note": "This is a syntax analysis demo. For full validation, use: nextflow config -check pipeline.nf" | |
| } | |
| # Add recommendations based on analysis | |
| if not validation_results["basic_checks"]["has_shebang"]: | |
| validation_results["recommendations"].append("Add shebang: #!/usr/bin/env nextflow") | |
| if not validation_results["basic_checks"]["uses_containers"]: | |
| validation_results["recommendations"].append("Consider using containers for reproducibility") | |
| if validation_results["pipeline_analysis"]["dsl_version"] == "DSL1": | |
| validation_results["recommendations"].append("Upgrade to DSL2 for better features") | |
| return json.dumps(validation_results, indent=2) | |
| def analyze_nextflow_log(self, log_content: str) -> str: | |
| """Mock log analysis for demo.""" | |
| if not log_content.strip(): | |
| return json.dumps({"error": "No log content provided"}, indent=2) | |
| analysis = { | |
| "log_analysis": { | |
| "total_lines": len(log_content.split('\n')), | |
| "timestamp": "Demo analysis", | |
| "log_size_chars": len(log_content) | |
| }, | |
| "issues_found": [], | |
| "patterns_detected": [], | |
| "performance_indicators": {}, | |
| "recommendations": [] | |
| } | |
| # Pattern matching for common issues | |
| lines = log_content.split('\n') | |
| for line in lines: | |
| line_lower = line.lower() | |
| if "error" in line_lower: | |
| analysis["issues_found"].append({ | |
| "type": "error", | |
| "line": line.strip(), | |
| "pattern": "Error detected", | |
| "suggestion": "Review error details and check input parameters" | |
| }) | |
| elif "failed" in line_lower: | |
| analysis["issues_found"].append({ | |
| "type": "failure", | |
| "line": line.strip(), | |
| "pattern": "Process failure", | |
| "suggestion": "Check process resource requirements and inputs" | |
| }) | |
| elif "exit status 137" in line_lower: | |
| analysis["issues_found"].append({ | |
| "type": "oom", | |
| "line": line.strip(), | |
| "pattern": "Out of memory (exit status 137)", | |
| "suggestion": "Increase memory allocation or optimize data processing" | |
| }) | |
| # Detect patterns | |
| if "nextflow" in log_content.lower(): | |
| analysis["patterns_detected"].append("Nextflow execution log") | |
| if "docker" in log_content.lower(): | |
| analysis["patterns_detected"].append("Docker container usage") | |
| if "process >" in log_content: | |
| analysis["patterns_detected"].append("Process execution details") | |
| analysis["summary"] = f"Analyzed {len(lines)} lines, found {len(analysis['issues_found'])} potential issues" | |
| analysis["demo_note"] = "This is a pattern-based analysis demo. Full analysis requires log context." | |
| return json.dumps(analysis, indent=2) | |
| def get_documentation(self, doc_type: str) -> str: | |
| """Get sample documentation for demo.""" | |
| docs = { | |
| "nextflow": """# Nextflow DSL2 Best Practices | |
| ## Overview | |
| Nextflow enables scalable and reproducible scientific workflows using software containers. | |
| ## Essential DSL2 Patterns | |
| ### Basic Pipeline Structure | |
| ```nextflow | |
| #!/usr/bin/env nextflow | |
| nextflow.enable.dsl=2 | |
| workflow { | |
| input_ch = Channel.fromPath(params.input) | |
| PROCESS_NAME(input_ch) | |
| } | |
| process PROCESS_NAME { | |
| container 'biocontainers/tool:version' | |
| input: | |
| path input_file | |
| output: | |
| path "output.txt" | |
| script: | |
| \"\"\" | |
| tool --input ${input_file} --output output.txt | |
| \"\"\" | |
| } | |
| ``` | |
| ## Resource Management | |
| - Always specify memory and CPU requirements | |
| - Use dynamic resource allocation for variable workloads | |
| - Implement retry strategies for robust execution | |
| ## OpenProblems Integration | |
| - Follow OpenProblems naming conventions | |
| - Use standardized input/output formats (h5ad) | |
| - Include comprehensive metadata and documentation | |
| """, | |
| "viash": """# Viash Component Development Guide | |
| ## Component Structure | |
| Every Viash component consists of: | |
| - config.vsh.yaml: Component configuration | |
| - script.py/R: Core functionality implementation | |
| - test.py/R: Unit tests | |
| ## Best Practices | |
| - Keep components focused on single tasks | |
| - Use descriptive parameter names and types | |
| - Include comprehensive help documentation | |
| - Implement proper error handling | |
| - Follow semantic versioning | |
| ## OpenProblems Standards | |
| - Use h5ad format for single-cell data | |
| - Include spatial coordinates in obsm['spatial'] | |
| - Validate input data structure | |
| - Generate standardized output formats | |
| """, | |
| "docker": """# Docker Optimization for Bioinformatics | |
| ## Multi-stage Builds | |
| Use multi-stage builds to reduce image size: | |
| ```dockerfile | |
| FROM python:3.10-slim as builder | |
| RUN pip install --user package | |
| FROM python:3.10-slim | |
| COPY --from=builder /root/.local /root/.local | |
| ``` | |
| ## Bioinformatics-Specific Tips | |
| - Use biocontainers as base images when available | |
| - Pin specific versions for reproducibility | |
| - Optimize layer caching for iterative development | |
| - Use .dockerignore to exclude large data files | |
| """, | |
| "spatial-workflows": """# Spatial Transcriptomics Pipeline Templates | |
| ## 1. Basic Preprocessing Pipeline | |
| ```nextflow | |
| process SPATIAL_QC { | |
| input: path spatial_data | |
| output: path "qc_results.h5ad" | |
| script: | |
| \"\"\" | |
| python qc_spatial.py --input ${spatial_data} --output qc_results.h5ad | |
| \"\"\" | |
| } | |
| ``` | |
| ## 2. Spatially Variable Genes | |
| ```nextflow | |
| process FIND_SVG { | |
| input: path processed_data | |
| output: path "svg_results.csv" | |
| script: | |
| \"\"\" | |
| python spatial_variable_genes.py --input ${processed_data} --output svg_results.csv | |
| \"\"\" | |
| } | |
| ``` | |
| ## 3. Label Transfer | |
| ```nextflow | |
| process LABEL_TRANSFER { | |
| input: | |
| path query_data | |
| path reference_data | |
| output: path "annotated_data.h5ad" | |
| script: | |
| \"\"\" | |
| python label_transfer.py --query ${query_data} --reference ${reference_data} --output annotated_data.h5ad | |
| \"\"\" | |
| } | |
| ``` | |
| """, | |
| "server-status": json.dumps({ | |
| "server_name": "OpenProblems Spatial Transcriptomics MCP", | |
| "version": "0.1.0", | |
| "status": "demo_mode", | |
| "environment": "Hugging Face Spaces", | |
| "capabilities": { | |
| "nextflow_execution": "demo_mode", | |
| "viash_components": "demo_mode", | |
| "docker_builds": False, | |
| "automated_testing": True, | |
| "log_analysis": True, | |
| "web_interface": True | |
| }, | |
| "supported_formats": ["h5ad", "json", "yaml", "nf", "vsh.yaml"], | |
| "documentation_available": True, | |
| "demo_note": "This is a demonstration environment. Full functionality available in local deployment." | |
| }, indent=2) | |
| } | |
| return docs.get(doc_type, f"Documentation for {doc_type} not available in demo mode.") | |
| def create_spatial_mcp_demo(): | |
| """Create the HF Spaces demo interface.""" | |
| mcp = MockMCPServer() | |
| # Custom CSS for better appearance | |
| css = """ | |
| .gradio-container { | |
| font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
| } | |
| .demo-header { | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| padding: 20px; | |
| border-radius: 10px; | |
| margin-bottom: 20px; | |
| } | |
| .tool-section { | |
| border: 1px solid #e0e0e0; | |
| border-radius: 8px; | |
| padding: 20px; | |
| margin: 10px 0; | |
| background: #fafafa; | |
| } | |
| .success { color: #28a745; } | |
| .warning { color: #ffc107; } | |
| .error { color: #dc3545; } | |
| """ | |
| with gr.Blocks( | |
| title="OpenProblems Spatial Transcriptomics MCP Server Demo", | |
| theme=gr.themes.Soft(), | |
| css=css | |
| ) as demo: | |
| gr.HTML(""" | |
| <div class="demo-header"> | |
| <h1>𧬠OpenProblems Spatial Transcriptomics MCP Server</h1> | |
| <h3>Interactive Demo - Model Context Protocol for AI-Powered Bioinformatics</h3> | |
| <p>π This demo showcases the MCP server that enables AI agents like Continue.dev to automate spatial transcriptomics workflows</p> | |
| </div> | |
| """) | |
| gr.Markdown(""" | |
| ## π― What is this? | |
| This is a **Model Context Protocol (MCP) server** designed for spatial transcriptomics research. It provides: | |
| - **11 specialized tools** for workflow automation | |
| - **5 knowledge resources** with curated documentation | |
| - **AI agent integration** for Continue.dev and other MCP-compatible tools | |
| - **Production deployment** via Docker and local installation | |
| > **Note**: This is a demo environment. For full functionality with Nextflow, Viash, and Docker, deploy locally. | |
| """) | |
| with gr.Tabs(): | |
| # Environment Check Tab | |
| with gr.Tab("π§ Environment Validation"): | |
| gr.Markdown("### Check Bioinformatics Environment") | |
| gr.Markdown("*Verify that required tools are installed and configured properly.*") | |
| with gr.Row(): | |
| tools_input = gr.Textbox( | |
| value="nextflow,viash,docker,java", | |
| label="Tools to Check", | |
| placeholder="Comma-separated list: nextflow,viash,docker,java", | |
| info="Enter tools to validate in your environment" | |
| ) | |
| check_btn = gr.Button("π Check Environment", variant="primary") | |
| env_output = gr.JSON( | |
| label="Environment Check Results", | |
| show_label=True | |
| ) | |
| check_btn.click(mcp.check_environment, tools_input, env_output) | |
| gr.Markdown(""" | |
| **π‘ What this tool does:** | |
| - Validates bioinformatics tool installations | |
| - Checks version compatibility | |
| - Provides installation recommendations | |
| - Assesses environment readiness for spatial workflows | |
| """) | |
| # Pipeline Validation Tab | |
| with gr.Tab("β‘ Pipeline Validation"): | |
| gr.Markdown("### Nextflow Pipeline Syntax Analysis") | |
| gr.Markdown("*Analyze Nextflow DSL2 pipelines for syntax and best practices.*") | |
| pipeline_input = gr.Textbox( | |
| label="Nextflow Pipeline Code", | |
| value="""#!/usr/bin/env nextflow | |
| nextflow.enable.dsl=2 | |
| workflow { | |
| input_ch = Channel.fromPath(params.input) | |
| SPATIAL_QC(input_ch) | |
| } | |
| process SPATIAL_QC { | |
| container 'biocontainers/scanpy:1.9.1' | |
| input: | |
| path spatial_data | |
| output: | |
| path "qc_results.h5ad" | |
| script: | |
| ''' | |
| python -c " | |
| import scanpy as sc | |
| import squidpy as sq | |
| adata = sc.read_h5ad('${spatial_data}') | |
| # Quality control analysis | |
| sc.pp.calculate_qc_metrics(adata) | |
| adata.write('qc_results.h5ad') | |
| " | |
| ''' | |
| }""", | |
| lines=20, | |
| placeholder="Paste your Nextflow pipeline code here..." | |
| ) | |
| validate_btn = gr.Button("π Validate Pipeline", variant="primary") | |
| validation_output = gr.JSON(label="Validation Results") | |
| validate_btn.click(mcp.validate_nextflow_config, pipeline_input, validation_output) | |
| gr.Markdown(""" | |
| **π‘ What this tool does:** | |
| - Analyzes DSL2 syntax and structure | |
| - Checks for best practices compliance | |
| - Identifies potential issues and improvements | |
| - Validates container usage and resource specifications | |
| """) | |
| # Log Analysis Tab | |
| with gr.Tab("π Log Analysis"): | |
| gr.Markdown("### Nextflow Execution Log Analysis") | |
| gr.Markdown("*AI-powered analysis of pipeline execution logs to identify issues and optimization opportunities.*") | |
| log_input = gr.Textbox( | |
| label="Nextflow Log Content", | |
| value="""N E X T F L O W ~ version 23.04.0 | |
| Launching `main.nf` [abc123] DSL2 - revision: def456 | |
| executor > local (4) | |
| [12/abc123] process > SPATIAL_QC [100%] 2 of 2 β | |
| [34/def456] process > FIND_SVG [ 50%] 1 of 2, failed: 1 β | |
| ERROR ~ Error executing process > 'FIND_SVG' | |
| Caused by: | |
| Process `FIND_SVG` terminated with an error exit status (137) | |
| Command executed: | |
| python spatial_variable_genes.py --input data.h5ad --output svg_results.csv | |
| Command exit status: | |
| 137 | |
| Work dir: | |
| /work/34/def456... | |
| Tip: you can replicate the issue by changing to the process work dir and entering the command shown above""", | |
| lines=15, | |
| placeholder="Paste Nextflow execution logs here..." | |
| ) | |
| analyze_btn = gr.Button("π Analyze Log", variant="primary") | |
| log_output = gr.JSON(label="Log Analysis Results") | |
| analyze_btn.click(mcp.analyze_nextflow_log, log_input, log_output) | |
| gr.Markdown(""" | |
| **π‘ What this tool does:** | |
| - Identifies common execution errors and failures | |
| - Detects out-of-memory issues (exit status 137) | |
| - Provides specific troubleshooting recommendations | |
| - Analyzes performance patterns and bottlenecks | |
| """) | |
| # Documentation Tab | |
| with gr.Tab("π Knowledge Resources"): | |
| gr.Markdown("### Access Curated Documentation") | |
| gr.Markdown("*Browse comprehensive documentation and templates for spatial transcriptomics workflows.*") | |
| doc_type = gr.Dropdown( | |
| choices=[ | |
| ("Nextflow Best Practices", "nextflow"), | |
| ("Viash Component Development", "viash"), | |
| ("Docker Optimization", "docker"), | |
| ("Spatial Workflow Templates", "spatial-workflows"), | |
| ("Server Status", "server-status") | |
| ], | |
| value="nextflow", | |
| label="Documentation Type", | |
| info="Select documentation category to explore" | |
| ) | |
| doc_btn = gr.Button("π Get Documentation", variant="primary") | |
| doc_output = gr.Textbox( | |
| label="Documentation Content", | |
| lines=20, | |
| max_lines=30 | |
| ) | |
| doc_btn.click(mcp.get_documentation, doc_type, doc_output) | |
| gr.Markdown(""" | |
| **π‘ Available Resources:** | |
| - **Nextflow**: DSL2 patterns, resource management, OpenProblems integration | |
| - **Viash**: Component structure, best practices, testing guidelines | |
| - **Docker**: Multi-stage builds, bioinformatics optimization | |
| - **Spatial Templates**: Ready-to-use pipeline examples | |
| - **Server Status**: Current capabilities and configuration | |
| """) | |
| # MCP Integration Tab | |
| with gr.Tab("π€ AI Agent Integration"): | |
| gr.Markdown("### Connect with Continue.dev and Other AI Agents") | |
| gr.Markdown(""" | |
| ## π Local Installation & Integration | |
| To use this MCP server with AI agents like Continue.dev: | |
| ### 1. Install the MCP Server | |
| ```bash | |
| git clone https://github.com/openproblems-bio/SpatialAI_MCP.git | |
| cd SpatialAI_MCP | |
| pip install -e . | |
| ``` | |
| ### 2. Configure Continue.dev | |
| Add this to your `~/.continue/config.json`: | |
| ```json | |
| { | |
| "experimental": { | |
| "modelContextProtocolServers": [ | |
| { | |
| "name": "openproblems-spatial", | |
| "transport": { | |
| "type": "stdio", | |
| "command": "python", | |
| "args": ["-m", "mcp_server.main"], | |
| "cwd": "/path/to/your/SpatialAI_MCP" | |
| } | |
| } | |
| ] | |
| } | |
| } | |
| ``` | |
| ### 3. Test the Integration | |
| Ask your AI agent: *"Check my spatial transcriptomics environment and help me create a quality control pipeline"* | |
| ## π οΈ Available MCP Tools | |
| """) | |
| # Display tools information | |
| tools_info = [] | |
| for tool, desc in mcp.tools_info.items(): | |
| tools_info.append(f"β’ **{tool}**: {desc}") | |
| gr.Markdown("### Tools (11 available):\n" + "\n".join(tools_info)) | |
| # Display resources information | |
| resources_info = [] | |
| for resource, desc in mcp.resources_info.items(): | |
| resources_info.append(f"β’ **{resource}**: {desc}") | |
| gr.Markdown("### Resources (5 available):\n" + "\n".join(resources_info)) | |
| gr.Markdown(""" | |
| ## π― Example AI Agent Interactions | |
| **User**: *"Help me set up spatial transcriptomics quality control"* | |
| **AI Agent Response**: | |
| ``` | |
| I'll help you create a comprehensive spatial QC pipeline. Let me first assess your environment. | |
| [Uses check_environment tool] | |
| β Docker: Available (version 28.1.1) | |
| β Nextflow: Not found | |
| β Viash: Not found | |
| [Uses list_directory tool] | |
| Found spatial data in: data/spatial_samples/ | |
| Existing configs: config/ | |
| Based on OpenProblems best practices, I'll: | |
| 1. Install missing dependencies | |
| 2. Create a modular QC pipeline | |
| 3. Generate Viash components | |
| 4. Set up comprehensive testing | |
| [Creates optimized pipeline with proper error handling and documentation] | |
| ``` | |
| ## π Additional Resources | |
| - **[Setup Guide](https://github.com/openproblems-bio/SpatialAI_MCP/blob/main/docs/CONTINUE_DEV_SETUP.md)**: Complete integration instructions | |
| - **[Agent Rules](https://github.com/openproblems-bio/SpatialAI_MCP/blob/main/docs/AGENT_RULES.md)**: Best practices for AI agents | |
| - **[Docker Deployment](https://github.com/openproblems-bio/SpatialAI_MCP/blob/main/docker/)**: Production deployment options | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| ## π Try It Yourself! | |
| 1. **Explore the tools** above to see MCP capabilities in action | |
| 2. **Install locally** for full Nextflow/Viash/Docker integration | |
| 3. **Connect with Continue.dev** for AI-powered spatial transcriptomics workflows | |
| **π Links**: | |
| [GitHub Repository](https://github.com/openproblems-bio/SpatialAI_MCP) | | |
| [OpenProblems Project](https://openproblems.bio) | | |
| [Model Context Protocol](https://modelcontextprotocol.io) | |
| *Transforming spatial transcriptomics research through AI-powered workflow automation.* π§¬β¨ | |
| """) | |
| return demo | |
| # For HF Spaces deployment | |
| if __name__ == "__main__": | |
| demo = create_spatial_mcp_demo() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| show_error=True, | |
| share=False # HF Spaces handles sharing | |
| ) | |