Spaces:

SEUyishu
/

MatDeepLearn

Sleeping

App Files Files Community

SEUyishu commited on Dec 3, 2025

Commit

6f5c6b4

verified ·

1 Parent(s): aa61e51

Update mcp_output/mcp_plugin/mcp_service.py

Browse files

Files changed (1) hide show

mcp_output/mcp_plugin/mcp_service.py +894 -640

mcp_output/mcp_plugin/mcp_service.py CHANGED Viewed

@@ -1,640 +1,894 @@
-"""
-MatDeepLearn MCP Service
-A Model Context Protocol service for materials property prediction using Graph Neural Networks.
-"""
-import os
-import sys
-import json
-import tempfile
-import yaml
-import numpy as np
-from typing import Optional, List, Dict, Any
-from pathlib import Path
-# Add MatDeepLearn to path
-project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-if project_root not in sys.path:
-    sys.path.insert(0, project_root)
-from fastmcp import FastMCP
-# Import MatDeepLearn modules
-try:
-    import torch
-    from matdeeplearn import models, process, training
-    from matdeeplearn.models.utils import model_summary
-    MATDEEPLEARN_AVAILABLE = True
-except ImportError as e:
-    MATDEEPLEARN_AVAILABLE = False
-    IMPORT_ERROR = str(e)
-mcp = FastMCP("matdeeplearn_service")
-@mcp.tool(name="check_environment", description="Check if MatDeepLearn environment is properly configured and GPU is available.")
-def check_environment() -> dict:
-    """
-    Check if the MatDeepLearn environment is properly configured.
-    Returns:
-        dict: Contains environment status including GPU availability.
-    """
-    try:
-        if not MATDEEPLEARN_AVAILABLE:
-            return {
-                "success": False,
-                "error": f"MatDeepLearn not available: {IMPORT_ERROR}"
-            }
-        gpu_available = torch.cuda.is_available()
-        gpu_count = torch.cuda.device_count() if gpu_available else 0
-        gpu_name = torch.cuda.get_device_name(0) if gpu_available else "N/A"
-        return {
-            "success": True,
-            "matdeeplearn_available": True,
-            "torch_version": torch.__version__,
-            "gpu_available": gpu_available,
-            "gpu_count": gpu_count,
-            "gpu_name": gpu_name,
-            "available_models": [
-                "CGCNN_demo", "MPNN_demo", "SchNet_demo",
-                "MEGNet_demo", "GCN_demo", "SOAP_demo", "SM_demo"
-            ]
-        }
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="list_available_models", description="List all available GNN models in MatDeepLearn.")
-def list_available_models() -> dict:
-    """
-    List all available Graph Neural Network models.
-    Returns:
-        dict: Contains list of available models with descriptions.
-    """
-    try:
-        models_info = {
-            "CGCNN_demo": {
-                "name": "Crystal Graph Convolutional Neural Network",
-                "description": "A GNN for predicting material properties using crystal graphs.",
-                "paper": "Xie & Grossman, PRL 2018"
-            },
-            "MPNN_demo": {
-                "name": "Message Passing Neural Network",
-                "description": "General message passing framework for molecular graphs.",
-                "paper": "Gilmer et al., ICML 2017"
-            },
-            "SchNet_demo": {
-                "name": "SchNet",
-                "description": "Continuous-filter convolutional neural network for modeling quantum interactions.",
-                "paper": "Schütt et al., JCP 2017"
-            },
-            "MEGNet_demo": {
-                "name": "MatErials Graph Network",
-                "description": "Graph network with global state for materials property prediction.",
-                "paper": "Chen et al., Chem. Mater. 2019"
-            },
-            "GCN_demo": {
-                "name": "Graph Convolutional Network",
-                "description": "Standard graph convolutional network architecture.",
-                "paper": "Kipf & Welling, ICLR 2017"
-            },
-            "SOAP_demo": {
-                "name": "Smooth Overlap of Atomic Positions",
-                "description": "Descriptor-based method using SOAP features.",
-                "paper": "Bartók et al., PRB 2013"
-            },
-            "SM_demo": {
-                "name": "Sine Matrix",
-                "description": "Descriptor-based method using Sine/Coulomb matrix features.",
-                "paper": "Various"
-            }
-        }
-        return {
-            "success": True,
-            "models": models_info,
-            "total_models": len(models_info)
-        }
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="get_model_config", description="Get the default configuration for a specific model.")
-def get_model_config(model_name: str) -> dict:
-    """
-    Get the default configuration for a specific GNN model.
-    Parameters:
-        model_name (str): Name of the model (e.g., 'CGCNN_demo', 'SchNet_demo').
-    Returns:
-        dict: Contains the default configuration for the model.
-    """
-    try:
-        config_path = os.path.join(project_root, "config.yml")
-        if not os.path.exists(config_path):
-            return {"success": False, "error": "Config file not found"}
-        with open(config_path, "r") as f:
-            config = yaml.load(f, Loader=yaml.FullLoader)
-        if model_name not in config.get("Models", {}):
-            return {
-                "success": False,
-                "error": f"Model '{model_name}' not found. Available models: {list(config.get('Models', {}).keys())}"
-            }
-        model_config = config["Models"][model_name]
-        processing_config = config.get("Processing", {})
-        training_config = config.get("Training", {})
-        return {
-            "success": True,
-            "model_name": model_name,
-            "model_config": model_config,
-            "processing_config": processing_config,
-            "training_config": training_config
-        }
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="process_structure_data", description="Process atomic structure data into graph format for GNN training.")
-def process_structure_data(
-    data_path: str,
-    target_index: int = 0,
-    graph_max_radius: float = 8.0,
-    graph_max_neighbors: int = 12,
-    reprocess: bool = False
-) -> dict:
-    """
-    Process atomic structure data into graph format.
-    Parameters:
-        data_path (str): Path to directory containing structure files and targets.csv.
-        target_index (int): Index of target column in targets.csv (default: 0).
-        graph_max_radius (float): Maximum radius for edges in graph (default: 8.0).
-        graph_max_neighbors (int): Maximum number of neighbors per atom (default: 12).
-        reprocess (bool): Whether to reprocess data even if processed files exist.
-    Returns:
-        dict: Contains processing status and dataset information.
-    """
-    try:
-        if not MATDEEPLEARN_AVAILABLE:
-            return {"success": False, "error": "MatDeepLearn not available"}
-        if not os.path.exists(data_path):
-            return {"success": False, "error": f"Data path not found: {data_path}"}
-        processing_args = {
-            "dataset_type": "inmemory",
-            "data_path": data_path,
-            "target_path": "targets.csv",
-            "dictionary_source": "default",
-            "dictionary_path": "atom_dict.json",
-            "data_format": "json",
-            "verbose": "True",
-            "graph_max_radius": graph_max_radius,
-            "graph_max_neighbors": graph_max_neighbors,
-            "voronoi": "False",
-            "edge_features": "True",
-            "graph_edge_length": 50,
-            "SM_descriptor": "False",
-            "SOAP_descriptor": "False"
-        }
-        dataset = process.get_dataset(
-            data_path,
-            target_index,
-            "True" if reprocess else "False",
-            processing_args
-        )
-        return {
-            "success": True,
-            "dataset_size": len(dataset),
-            "sample_data": {
-                "num_nodes": dataset[0].x.shape[0] if len(dataset) > 0 else 0,
-                "num_node_features": dataset[0].x.shape[1] if len(dataset) > 0 else 0,
-                "num_edges": dataset[0].edge_index.shape[1] if len(dataset) > 0 else 0
-            },
-            "data_path": data_path
-        }
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="train_model", description="Train a GNN model on processed structure data.")
-def train_model(
-    data_path: str,
-    model_name: str = "CGCNN_demo",
-    epochs: int = 100,
-    batch_size: int = 32,
-    learning_rate: float = 0.002,
-    train_ratio: float = 0.8,
-    val_ratio: float = 0.1,
-    test_ratio: float = 0.1,
-    save_model: bool = True,
-    model_path: str = "trained_model.pth"
-) -> dict:
-    """
-    Train a GNN model on processed structure data.
-    Parameters:
-        data_path (str): Path to directory containing processed structure data.
-        model_name (str): Name of the model to train (default: 'CGCNN_demo').
-        epochs (int): Number of training epochs (default: 100).
-        batch_size (int): Training batch size (default: 32).
-        learning_rate (float): Learning rate (default: 0.002).
-        train_ratio (float): Ratio of data for training (default: 0.8).
-        val_ratio (float): Ratio of data for validation (default: 0.1).
-        test_ratio (float): Ratio of data for testing (default: 0.1).
-        save_model (bool): Whether to save the trained model (default: True).
-        model_path (str): Path to save the trained model (default: 'trained_model.pth').
-    Returns:
-        dict: Contains training results including train/val/test errors.
-    """
-    try:
-        if not MATDEEPLEARN_AVAILABLE:
-            return {"success": False, "error": "MatDeepLearn not available"}
-        if not os.path.exists(data_path):
-            return {"success": False, "error": f"Data path not found: {data_path}"}
-        # Load default config
-        config_path = os.path.join(project_root, "config.yml")
-        with open(config_path, "r") as f:
-            config = yaml.load(f, Loader=yaml.FullLoader)
-        if model_name not in config.get("Models", {}):
-            return {"success": False, "error": f"Model '{model_name}' not found"}
-        # Prepare configuration
-        job_config = {
-            "job_name": "mcp_train_job",
-            "reprocess": "False",
-            "model": model_name,
-            "load_model": "False",
-            "save_model": "True" if save_model else "False",
-            "model_path": model_path,
-            "write_output": "True",
-            "parallel": "False",
-            "seed": np.random.randint(1, 1e6)
-        }
-        training_config = {
-            "target_index": 0,
-            "loss": "l1_loss",
-            "train_ratio": train_ratio,
-            "val_ratio": val_ratio,
-            "test_ratio": test_ratio,
-            "verbosity": 5
-        }
-        model_config = config["Models"][model_name].copy()
-        model_config["epochs"] = epochs
-        model_config["batch_size"] = batch_size
-        model_config["lr"] = learning_rate
-        # Determine device
-        world_size = torch.cuda.device_count()
-        if world_size == 0:
-            rank = "cpu"
-        else:
-            rank = "cuda"
-        # Train model
-        error_values = training.train_regular(
-            rank,
-            world_size,
-            data_path,
-            job_config,
-            training_config,
-            model_config
-        )
-        return {
-            "success": True,
-            "model_name": model_name,
-            "epochs": epochs,
-            "train_error": float(error_values[0]) if error_values is not None else None,
-            "val_error": float(error_values[1]) if error_values is not None else None,
-            "test_error": float(error_values[2]) if error_values is not None else None,
-            "model_saved": save_model,
-            "model_path": model_path if save_model else None
-        }
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="predict_properties", description="Use a trained model to predict properties of new structures.")
-def predict_properties(
-    data_path: str,
-    model_path: str,
-    target_index: int = 0
-) -> dict:
-    """
-    Use a trained model to predict properties of new structures.
-    Parameters:
-        data_path (str): Path to directory containing structure files to predict.
-        model_path (str): Path to the trained model file (.pth).
-        target_index (int): Index of target column (default: 0).
-    Returns:
-        dict: Contains predictions and error metrics.
-    """
-    try:
-        if not MATDEEPLEARN_AVAILABLE:
-            return {"success": False, "error": "MatDeepLearn not available"}
-        if not os.path.exists(data_path):
-            return {"success": False, "error": f"Data path not found: {data_path}"}
-        if not os.path.exists(model_path):
-            return {"success": False, "error": f"Model file not found: {model_path}"}
-        # Get dataset
-        dataset = process.get_dataset(data_path, target_index, "False")
-        job_config = {
-            "job_name": "mcp_predict_job",
-            "model_path": model_path,
-            "write_output": "True"
-        }
-        # Run prediction
-        test_error = training.predict(dataset, "l1_loss", job_config)
-        return {
-            "success": True,
-            "dataset_size": len(dataset),
-            "test_error": float(test_error),
-            "output_file": "mcp_predict_job_predicted_outputs.csv"
-        }
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="cross_validation", description="Perform k-fold cross validation on a dataset.")
-def cross_validation(
-    data_path: str,
-    model_name: str = "CGCNN_demo",
-    cv_folds: int = 5,
-    epochs: int = 100
-) -> dict:
-    """
-    Perform k-fold cross validation on a dataset.
-    Parameters:
-        data_path (str): Path to directory containing structure data.
-        model_name (str): Name of the model to use (default: 'CGCNN_demo').
-        cv_folds (int): Number of cross-validation folds (default: 5).
-        epochs (int): Number of training epochs per fold (default: 100).
-    Returns:
-        dict: Contains cross-validation results.
-    """
-    try:
-        if not MATDEEPLEARN_AVAILABLE:
-            return {"success": False, "error": "MatDeepLearn not available"}
-        if not os.path.exists(data_path):
-            return {"success": False, "error": f"Data path not found: {data_path}"}
-        # Load config
-        config_path = os.path.join(project_root, "config.yml")
-        with open(config_path, "r") as f:
-            config = yaml.load(f, Loader=yaml.FullLoader)
-        if model_name not in config.get("Models", {}):
-            return {"success": False, "error": f"Model '{model_name}' not found"}
-        job_config = {
-            "job_name": "mcp_cv_job",
-            "reprocess": "False",
-            "model": model_name,
-            "cv_folds": cv_folds,
-            "write_output": "True",
-            "parallel": "False",
-            "seed": np.random.randint(1, 1e6)
-        }
-        training_config = {
-            "target_index": 0,
-            "loss": "l1_loss",
-            "verbosity": 5
-        }
-        model_config = config["Models"][model_name].copy()
-        model_config["epochs"] = epochs
-        world_size = torch.cuda.device_count()
-        rank = "cpu" if world_size == 0 else "cuda"
-        cv_error = training.train_CV(
-            rank,
-            world_size,
-            data_path,
-            job_config,
-            training_config,
-            model_config
-        )
-        return {
-            "success": True,
-            "model_name": model_name,
-            "cv_folds": cv_folds,
-            "cv_error": float(cv_error) if cv_error is not None else None,
-            "output_file": "mcp_cv_job_CV_outputs.csv"
-        }
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="analyze_structure", description="Analyze the structure of atomic data and convert to graph representation info.")
-def analyze_structure(structure_file: str) -> dict:
-    """
-    Analyze the structure of an atomic structure file.
-    Parameters:
-        structure_file (str): Path to a structure file (json, cif, xyz, POSCAR, etc.).
-    Returns:
-        dict: Contains structure analysis including atoms, bonds, and graph info.
-    """
-    try:
-        if not os.path.exists(structure_file):
-            return {"success": False, "error": f"Structure file not found: {structure_file}"}
-        import ase
-        from ase import io
-        # Read structure
-        structure = ase.io.read(structure_file)
-        # Get basic info
-        symbols = structure.get_chemical_symbols()
-        positions = structure.get_positions().tolist()
-        cell = structure.get_cell().tolist() if any(structure.pbc) else None
-        pbc = structure.pbc.tolist()
-        # Get distance matrix
-        distance_matrix = structure.get_all_distances(mic=True)
-        # Analyze connectivity
-        cutoff_radius = 8.0
-        neighbors_count = []
-        for i in range(len(structure)):
-            neighbors = np.sum((distance_matrix[i] > 0) & (distance_matrix[i] < cutoff_radius))
-            neighbors_count.append(int(neighbors))
-        return {
-            "success": True,
-            "num_atoms": len(structure),
-            "chemical_formula": structure.get_chemical_formula(),
-            "elements": list(set(symbols)),
-            "element_counts": {elem: symbols.count(elem) for elem in set(symbols)},
-            "has_periodicity": any(pbc),
-            "pbc": pbc,
-            "cell": cell,
-            "average_neighbors": float(np.mean(neighbors_count)),
-            "min_neighbors": min(neighbors_count),
-            "max_neighbors": max(neighbors_count),
-            "min_distance": float(distance_matrix[distance_matrix > 0].min()),
-            "max_distance": float(distance_matrix.max())
-        }
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="compare_models", description="Compare performance of different GNN models on a dataset.")
-def compare_models(
-    data_path: str,
-    model_list: List[str] = None,
-    epochs: int = 50
-) -> dict:
-    """
-    Compare performance of different GNN models on a dataset.
-    Parameters:
-        data_path (str): Path to directory containing structure data.
-        model_list (List[str]): List of models to compare (default: all available).
-        epochs (int): Number of training epochs per model (default: 50).
-    Returns:
-        dict: Contains comparison results for each model.
-    """
-    try:
-        if not MATDEEPLEARN_AVAILABLE:
-            return {"success": False, "error": "MatDeepLearn not available"}
-        if not os.path.exists(data_path):
-            return {"success": False, "error": f"Data path not found: {data_path}"}
-        if model_list is None:
-            model_list = ["CGCNN_demo", "GCN_demo", "SchNet_demo"]
-        results = {}
-        for model_name in model_list:
-            try:
-                result = train_model(
-                    data_path=data_path,
-                    model_name=model_name,
-                    epochs=epochs,
-                    save_model=False
-                )
-                if result["success"]:
-                    results[model_name] = {
-                        "train_error": result["train_error"],
-                        "val_error": result["val_error"],
-                        "test_error": result["test_error"]
-                    }
-                else:
-                    results[model_name] = {"error": result["error"]}
-            except Exception as e:
-                results[model_name] = {"error": str(e)}
-        # Find best model
-        best_model = None
-        best_error = float("inf")
-        for model, res in results.items():
-            if "test_error" in res and res["test_error"] is not None:
-                if res["test_error"] < best_error:
-                    best_error = res["test_error"]
-                    best_model = model
-        return {
-            "success": True,
-            "results": results,
-            "best_model": best_model,
-            "best_test_error": best_error if best_model else None
-        }
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="get_dataset_info", description="Get information about a dataset directory.")
-def get_dataset_info(data_path: str) -> dict:
-    """
-    Get information about a dataset directory.
-    Parameters:
-        data_path (str): Path to directory containing structure data.
-    Returns:
-        dict: Contains dataset information including file counts and formats.
-    """
-    try:
-        if not os.path.exists(data_path):
-            return {"success": False, "error": f"Data path not found: {data_path}"}
-        # Count files by extension
-        extensions = {}
-        for file in os.listdir(data_path):
-            ext = os.path.splitext(file)[1].lower()
-            extensions[ext] = extensions.get(ext, 0) + 1
-        # Check for required files
-        has_targets = os.path.exists(os.path.join(data_path, "targets.csv"))
-        has_atom_dict = os.path.exists(os.path.join(data_path, "atom_dict.json"))
-        has_processed = os.path.exists(os.path.join(data_path, "processed"))
-        # Read targets if available
-        num_samples = 0
-        if has_targets:
-            import csv
-            with open(os.path.join(data_path, "targets.csv")) as f:
-                num_samples = sum(1 for _ in csv.reader(f))
-        return {
-            "success": True,
-            "data_path": data_path,
-            "file_extensions": extensions,
-            "has_targets_csv": has_targets,
-            "has_atom_dict": has_atom_dict,
-            "has_processed_data": has_processed,
-            "num_samples": num_samples,
-            "ready_for_training": has_targets
-        }
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-def create_app() -> FastMCP:
-    """
-    Creates and returns the FastMCP application instance.
-    Returns:
-        FastMCP: The FastMCP application instance.
-    """
-    return mcp

+"""
+MatDeepLearn MCP Service
+A Model Context Protocol service for materials property prediction using Graph Neural Networks.
+"""
+import os
+import sys
+import json
+import tempfile
+import yaml
+import numpy as np
+from typing import Optional, List, Dict, Any
+from pathlib import Path
+# Add MatDeepLearn to path
+project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+from fastmcp import FastMCP
+# Import MatDeepLearn modules
+try:
+    import torch
+    from matdeeplearn import models, process, training
+    from matdeeplearn.models.utils import model_summary
+    MATDEEPLEARN_AVAILABLE = True
+except ImportError as e:
+    MATDEEPLEARN_AVAILABLE = False
+    IMPORT_ERROR = str(e)
+mcp = FastMCP("matdeeplearn_service")
+@mcp.tool(name="check_environment", description="Check if MatDeepLearn environment is properly configured and GPU is available.")
+def check_environment() -> dict:
+    """
+    Check if the MatDeepLearn environment is properly configured.
+    Returns:
+        dict: Contains environment status including GPU availability.
+    """
+    try:
+        if not MATDEEPLEARN_AVAILABLE:
+            return {
+                "success": False,
+                "error": f"MatDeepLearn not available: {IMPORT_ERROR}"
+            }
+        gpu_available = torch.cuda.is_available()
+        gpu_count = torch.cuda.device_count() if gpu_available else 0
+        gpu_name = torch.cuda.get_device_name(0) if gpu_available else "N/A"
+        return {
+            "success": True,
+            "matdeeplearn_available": True,
+            "torch_version": torch.__version__,
+            "gpu_available": gpu_available,
+            "gpu_count": gpu_count,
+            "gpu_name": gpu_name,
+            "available_models": [
+                "CGCNN_demo", "MPNN_demo", "SchNet_demo",
+                "MEGNet_demo", "GCN_demo", "SOAP_demo", "SM_demo"
+            ]
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="list_available_models", description="List all available GNN models in MatDeepLearn.")
+def list_available_models() -> dict:
+    """
+    List all available Graph Neural Network models.
+    Returns:
+        dict: Contains list of available models with descriptions.
+    """
+    try:
+        models_info = {
+            "CGCNN_demo": {
+                "name": "Crystal Graph Convolutional Neural Network",
+                "description": "A GNN for predicting material properties using crystal graphs.",
+                "paper": "Xie & Grossman, PRL 2018"
+            },
+            "MPNN_demo": {
+                "name": "Message Passing Neural Network",
+                "description": "General message passing framework for molecular graphs.",
+                "paper": "Gilmer et al., ICML 2017"
+            },
+            "SchNet_demo": {
+                "name": "SchNet",
+                "description": "Continuous-filter convolutional neural network for modeling quantum interactions.",
+                "paper": "Schütt et al., JCP 2017"
+            },
+            "MEGNet_demo": {
+                "name": "MatErials Graph Network",
+                "description": "Graph network with global state for materials property prediction.",
+                "paper": "Chen et al., Chem. Mater. 2019"
+            },
+            "GCN_demo": {
+                "name": "Graph Convolutional Network",
+                "description": "Standard graph convolutional network architecture.",
+                "paper": "Kipf & Welling, ICLR 2017"
+            },
+            "SOAP_demo": {
+                "name": "Smooth Overlap of Atomic Positions",
+                "description": "Descriptor-based method using SOAP features.",
+                "paper": "Bartók et al., PRB 2013"
+            },
+            "SM_demo": {
+                "name": "Sine Matrix",
+                "description": "Descriptor-based method using Sine/Coulomb matrix features.",
+                "paper": "Various"
+            }
+        }
+        return {
+            "success": True,
+            "models": models_info,
+            "total_models": len(models_info)
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="get_model_config", description="Get the default configuration for a specific model.")
+def get_model_config(model_name: str) -> dict:
+    """
+    Get the default configuration for a specific GNN model.
+    Parameters:
+        model_name (str): Name of the model (e.g., 'CGCNN_demo', 'SchNet_demo').
+    Returns:
+        dict: Contains the default configuration for the model.
+    """
+    try:
+        config_path = os.path.join(project_root, "config.yml")
+        if not os.path.exists(config_path):
+            return {"success": False, "error": "Config file not found"}
+        with open(config_path, "r") as f:
+            config = yaml.load(f, Loader=yaml.FullLoader)
+        if model_name not in config.get("Models", {}):
+            return {
+                "success": False,
+                "error": f"Model '{model_name}' not found. Available models: {list(config.get('Models', {}).keys())}"
+            }
+        model_config = config["Models"][model_name]
+        processing_config = config.get("Processing", {})
+        training_config = config.get("Training", {})
+        return {
+            "success": True,
+            "model_name": model_name,
+            "model_config": model_config,
+            "processing_config": processing_config,
+            "training_config": training_config
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="process_structure_data", description="Process atomic structure data into graph format for GNN training. Provide either data_path (server path) or structure_contents (direct file contents).")
+def process_structure_data(
+    data_path: Optional[str] = None,
+    structure_contents: Optional[Dict[str, str]] = None,
+    targets_csv: Optional[str] = None,
+    target_index: int = 0,
+    graph_max_radius: float = 8.0,
+    graph_max_neighbors: int = 12,
+    reprocess: bool = False
+) -> dict:
+    """
+    Process atomic structure data into graph format.
+    Parameters:
+        data_path (str, optional): Path to directory containing structure files (server-side).
+        structure_contents (dict, optional): Dictionary mapping filenames to file contents.
+                                            Example: {"structure1.cif": "CIF content...", "structure2.cif": "..."}
+        targets_csv (str, optional): Content of targets.csv file. Required with structure_contents.
+        target_index (int): Index of target column in targets.csv (default: 0).
+        graph_max_radius (float): Maximum radius for edges in graph (default: 8.0).
+        graph_max_neighbors (int): Maximum number of neighbors per atom (default: 12).
+        reprocess (bool): Whether to reprocess data even if processed files exist.
+    Returns:
+        dict: Contains processing status and dataset information.
+    Example usage with direct content:
+        process_structure_data(
+            structure_contents={"struct1.cif": "CIF content...", "struct2.cif": "..."},
+            targets_csv="struct1,1.5\\nstruct2,2.3"
+        )
+    """
+    try:
+        if not MATDEEPLEARN_AVAILABLE:
+            return {"success": False, "error": "MatDeepLearn not available"}
+        # If structure_contents provided, create temp directory
+        temp_dir = None
+        if structure_contents is not None:
+            if targets_csv is None:
+                return {"success": False, "error": "targets_csv is required when providing structure_contents"}
+            # Create temporary directory with uploaded files
+            temp_dir = tempfile.mkdtemp(prefix="mcp_data_")
+            # Write structure files
+            for filename, content in structure_contents.items():
+                filepath = os.path.join(temp_dir, filename)
+                with open(filepath, 'w') as f:
+                    f.write(content)
+            # Write targets.csv
+            with open(os.path.join(temp_dir, "targets.csv"), 'w') as f:
+                f.write(targets_csv)
+            data_path = temp_dir
+        if data_path is None:
+            return {"success": False, "error": "Either data_path or structure_contents must be provided"}
+        if not os.path.exists(data_path):
+            return {"success": False, "error": f"Data path not found: {data_path}"}
+        processing_args = {
+            "dataset_type": "inmemory",
+            "data_path": data_path,
+            "target_path": "targets.csv",
+            "dictionary_source": "default",
+            "dictionary_path": "atom_dict.json",
+            "data_format": "json",
+            "verbose": "True",
+            "graph_max_radius": graph_max_radius,
+            "graph_max_neighbors": graph_max_neighbors,
+            "voronoi": "False",
+            "edge_features": "True",
+            "graph_edge_length": 50,
+            "SM_descriptor": "False",
+            "SOAP_descriptor": "False"
+        }
+        dataset = process.get_dataset(
+            data_path,
+            target_index,
+            "True" if reprocess else "False",
+            processing_args
+        )
+        result = {
+            "success": True,
+            "dataset_size": len(dataset),
+            "sample_data": {
+                "num_nodes": dataset[0].x.shape[0] if len(dataset) > 0 else 0,
+                "num_node_features": dataset[0].x.shape[1] if len(dataset) > 0 else 0,
+                "num_edges": dataset[0].edge_index.shape[1] if len(dataset) > 0 else 0
+            },
+            "data_path": data_path,
+            "is_temporary": temp_dir is not None
+        }
+        # Note: Don't delete temp_dir yet, it may be needed for training
+        if temp_dir:
+            result["temp_data_path"] = temp_dir
+        return result
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="train_model", description="Train a GNN model on processed structure data.")
+def train_model(
+    data_path: str,
+    model_name: str = "CGCNN_demo",
+    epochs: int = 100,
+    batch_size: int = 32,
+    learning_rate: float = 0.002,
+    train_ratio: float = 0.8,
+    val_ratio: float = 0.1,
+    test_ratio: float = 0.1,
+    save_model: bool = True,
+    model_path: str = "trained_model.pth"
+) -> dict:
+    """
+    Train a GNN model on processed structure data.
+    Parameters:
+        data_path (str): Path to directory containing processed structure data.
+        model_name (str): Name of the model to train (default: 'CGCNN_demo').
+        epochs (int): Number of training epochs (default: 100).
+        batch_size (int): Training batch size (default: 32).
+        learning_rate (float): Learning rate (default: 0.002).
+        train_ratio (float): Ratio of data for training (default: 0.8).
+        val_ratio (float): Ratio of data for validation (default: 0.1).
+        test_ratio (float): Ratio of data for testing (default: 0.1).
+        save_model (bool): Whether to save the trained model (default: True).
+        model_path (str): Path to save the trained model (default: 'trained_model.pth').
+    Returns:
+        dict: Contains training results including train/val/test errors.
+    """
+    try:
+        if not MATDEEPLEARN_AVAILABLE:
+            return {"success": False, "error": "MatDeepLearn not available"}
+        if not os.path.exists(data_path):
+            return {"success": False, "error": f"Data path not found: {data_path}"}
+        # Load default config
+        config_path = os.path.join(project_root, "config.yml")
+        with open(config_path, "r") as f:
+            config = yaml.load(f, Loader=yaml.FullLoader)
+        if model_name not in config.get("Models", {}):
+            return {"success": False, "error": f"Model '{model_name}' not found"}
+        # Prepare configuration
+        job_config = {
+            "job_name": "mcp_train_job",
+            "reprocess": "False",
+            "model": model_name,
+            "load_model": "False",
+            "save_model": "True" if save_model else "False",
+            "model_path": model_path,
+            "write_output": "True",
+            "parallel": "False",
+            "seed": np.random.randint(1, 1e6)
+        }
+        training_config = {
+            "target_index": 0,
+            "loss": "l1_loss",
+            "train_ratio": train_ratio,
+            "val_ratio": val_ratio,
+            "test_ratio": test_ratio,
+            "verbosity": 5
+        }
+        model_config = config["Models"][model_name].copy()
+        model_config["epochs"] = epochs
+        model_config["batch_size"] = batch_size
+        model_config["lr"] = learning_rate
+        # Determine device
+        world_size = torch.cuda.device_count()
+        if world_size == 0:
+            rank = "cpu"
+        else:
+            rank = "cuda"
+        # Train model
+        error_values = training.train_regular(
+            rank,
+            world_size,
+            data_path,
+            job_config,
+            training_config,
+            model_config
+        )
+        return {
+            "success": True,
+            "model_name": model_name,
+            "epochs": epochs,
+            "train_error": float(error_values[0]) if error_values is not None else None,
+            "val_error": float(error_values[1]) if error_values is not None else None,
+            "test_error": float(error_values[2]) if error_values is not None else None,
+            "model_saved": save_model,
+            "model_path": model_path if save_model else None
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="predict_properties", description="Use a trained model to predict properties of new structures.")
+def predict_properties(
+    data_path: str,
+    model_path: str,
+    target_index: int = 0
+) -> dict:
+    """
+    Use a trained model to predict properties of new structures.
+    Parameters:
+        data_path (str): Path to directory containing structure files to predict.
+        model_path (str): Path to the trained model file (.pth).
+        target_index (int): Index of target column (default: 0).
+    Returns:
+        dict: Contains predictions and error metrics.
+    """
+    try:
+        if not MATDEEPLEARN_AVAILABLE:
+            return {"success": False, "error": "MatDeepLearn not available"}
+        if not os.path.exists(data_path):
+            return {"success": False, "error": f"Data path not found: {data_path}"}
+        if not os.path.exists(model_path):
+            return {"success": False, "error": f"Model file not found: {model_path}"}
+        # Get dataset
+        dataset = process.get_dataset(data_path, target_index, "False")
+        job_config = {
+            "job_name": "mcp_predict_job",
+            "model_path": model_path,
+            "write_output": "True"
+        }
+        # Run prediction
+        test_error = training.predict(dataset, "l1_loss", job_config)
+        return {
+            "success": True,
+            "dataset_size": len(dataset),
+            "test_error": float(test_error),
+            "output_file": "mcp_predict_job_predicted_outputs.csv"
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="cross_validation", description="Perform k-fold cross validation on a dataset.")
+def cross_validation(
+    data_path: str,
+    model_name: str = "CGCNN_demo",
+    cv_folds: int = 5,
+    epochs: int = 100
+) -> dict:
+    """
+    Perform k-fold cross validation on a dataset.
+    Parameters:
+        data_path (str): Path to directory containing structure data.
+        model_name (str): Name of the model to use (default: 'CGCNN_demo').
+        cv_folds (int): Number of cross-validation folds (default: 5).
+        epochs (int): Number of training epochs per fold (default: 100).
+    Returns:
+        dict: Contains cross-validation results.
+    """
+    try:
+        if not MATDEEPLEARN_AVAILABLE:
+            return {"success": False, "error": "MatDeepLearn not available"}
+        if not os.path.exists(data_path):
+            return {"success": False, "error": f"Data path not found: {data_path}"}
+        # Load config
+        config_path = os.path.join(project_root, "config.yml")
+        with open(config_path, "r") as f:
+            config = yaml.load(f, Loader=yaml.FullLoader)
+        if model_name not in config.get("Models", {}):
+            return {"success": False, "error": f"Model '{model_name}' not found"}
+        job_config = {
+            "job_name": "mcp_cv_job",
+            "reprocess": "False",
+            "model": model_name,
+            "cv_folds": cv_folds,
+            "write_output": "True",
+            "parallel": "False",
+            "seed": np.random.randint(1, 1e6)
+        }
+        training_config = {
+            "target_index": 0,
+            "loss": "l1_loss",
+            "verbosity": 5
+        }
+        model_config = config["Models"][model_name].copy()
+        model_config["epochs"] = epochs
+        world_size = torch.cuda.device_count()
+        rank = "cpu" if world_size == 0 else "cuda"
+        cv_error = training.train_CV(
+            rank,
+            world_size,
+            data_path,
+            job_config,
+            training_config,
+            model_config
+        )
+        return {
+            "success": True,
+            "model_name": model_name,
+            "cv_folds": cv_folds,
+            "cv_error": float(cv_error) if cv_error is not None else None,
+            "output_file": "mcp_cv_job_CV_outputs.csv"
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="analyze_structure", description="Analyze atomic structure data. You can pass file content directly (for CIF, XYZ, POSCAR formats) or a file path on the server.")
+def analyze_structure(
+    file_content: Optional[str] = None,
+    file_format: Optional[str] = None,
+    structure_file: Optional[str] = None
+) -> dict:
+    """
+    Analyze the structure of an atomic structure.
+    Parameters:
+        file_content (str, optional): The content of the structure file (CIF, XYZ, POSCAR, JSON format).
+                                      Pass the actual file content directly here.
+        file_format (str, optional): Format of the file content ('cif', 'xyz', 'vasp', 'json').
+                                     Required when file_content is provided.
+        structure_file (str, optional): Path to a structure file on the server (legacy option).
+    Returns:
+        dict: Contains structure analysis including atoms, bonds, and graph info.
+    Example usage:
+        analyze_structure(file_content="your CIF file content here...", file_format="cif")
+    """
+    try:
+        import ase
+        from ase import io
+        from io import StringIO
+        structure = None
+        # Method 1: Direct file content (preferred for remote access)
+        if file_content is not None:
+            if file_format is None:
+                return {"success": False, "error": "file_format is required when providing file_content. Use 'cif', 'xyz', 'vasp', or 'json'."}
+            # Map common format names
+            format_map = {
+                'cif': 'cif',
+                'xyz': 'xyz',
+                'vasp': 'vasp',
+                'poscar': 'vasp',
+                'json': 'json',
+                'extxyz': 'extxyz'
+            }
+            fmt = format_map.get(file_format.lower())
+            if fmt is None:
+                return {"success": False, "error": f"Unsupported format: {file_format}. Supported: cif, xyz, vasp, poscar, json, extxyz"}
+            # Create a temporary file to read the structure
+            with tempfile.NamedTemporaryFile(mode='w', suffix=f'.{fmt}', delete=False) as tmp:
+                tmp.write(file_content)
+                tmp_path = tmp.name
+            try:
+                structure = ase.io.read(tmp_path, format=fmt)
+            finally:
+                os.unlink(tmp_path)  # Clean up temp file
+        # Method 2: File path on server (legacy)
+        elif structure_file is not None:
+            if not os.path.exists(structure_file):
+                return {"success": False, "error": f"Structure file not found: {structure_file}. Tip: For remote MCP, pass file_content directly instead of file path."}
+            structure = ase.io.read(structure_file)
+        else:
+            return {"success": False, "error": "Either file_content (with file_format) or structure_file must be provided."}
+        # Get basic info
+        symbols = structure.get_chemical_symbols()
+        positions = structure.get_positions().tolist()
+        cell = structure.get_cell().tolist() if any(structure.pbc) else None
+        pbc = structure.pbc.tolist()
+        # Get distance matrix
+        distance_matrix = structure.get_all_distances(mic=True)
+        # Analyze connectivity
+        cutoff_radius = 8.0
+        neighbors_count = []
+        for i in range(len(structure)):
+            neighbors = np.sum((distance_matrix[i] > 0) & (distance_matrix[i] < cutoff_radius))
+            neighbors_count.append(int(neighbors))
+        return {
+            "success": True,
+            "num_atoms": len(structure),
+            "chemical_formula": structure.get_chemical_formula(),
+            "elements": list(set(symbols)),
+            "element_counts": {elem: symbols.count(elem) for elem in set(symbols)},
+            "has_periodicity": any(pbc),
+            "pbc": pbc,
+            "cell": cell,
+            "positions": positions[:10] if len(positions) > 10 else positions,  # First 10 positions
+            "average_neighbors": float(np.mean(neighbors_count)),
+            "min_neighbors": min(neighbors_count),
+            "max_neighbors": max(neighbors_count),
+            "min_distance": float(distance_matrix[distance_matrix > 0].min()),
+            "max_distance": float(distance_matrix.max())
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="compare_models", description="Compare performance of different GNN models on a dataset.")
+def compare_models(
+    data_path: str,
+    model_list: List[str] = None,
+    epochs: int = 50
+) -> dict:
+    """
+    Compare performance of different GNN models on a dataset.
+    Parameters:
+        data_path (str): Path to directory containing structure data.
+        model_list (List[str]): List of models to compare (default: all available).
+        epochs (int): Number of training epochs per model (default: 50).
+    Returns:
+        dict: Contains comparison results for each model.
+    """
+    try:
+        if not MATDEEPLEARN_AVAILABLE:
+            return {"success": False, "error": "MatDeepLearn not available"}
+        if not os.path.exists(data_path):
+            return {"success": False, "error": f"Data path not found: {data_path}"}
+        if model_list is None:
+            model_list = ["CGCNN_demo", "GCN_demo", "SchNet_demo"]
+        results = {}
+        for model_name in model_list:
+            try:
+                result = train_model(
+                    data_path=data_path,
+                    model_name=model_name,
+                    epochs=epochs,
+                    save_model=False
+                )
+                if result["success"]:
+                    results[model_name] = {
+                        "train_error": result["train_error"],
+                        "val_error": result["val_error"],
+                        "test_error": result["test_error"]
+                    }
+                else:
+                    results[model_name] = {"error": result["error"]}
+            except Exception as e:
+                results[model_name] = {"error": str(e)}
+        # Find best model
+        best_model = None
+        best_error = float("inf")
+        for model, res in results.items():
+            if "test_error" in res and res["test_error"] is not None:
+                if res["test_error"] < best_error:
+                    best_error = res["test_error"]
+                    best_model = model
+        return {
+            "success": True,
+            "results": results,
+            "best_model": best_model,
+            "best_test_error": best_error if best_model else None
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="get_dataset_info", description="Get information about a dataset directory or uploaded dataset.")
+def get_dataset_info(
+    data_path: Optional[str] = None,
+    structure_files: Optional[List[str]] = None,
+    targets_csv_content: Optional[str] = None
+) -> dict:
+    """
+    Get information about a dataset.
+    Parameters:
+        data_path (str, optional): Path to directory containing structure data (server-side).
+        structure_files (List[str], optional): List of structure filenames (for validation check).
+        targets_csv_content (str, optional): Content of targets.csv file to analyze.
+    Returns:
+        dict: Contains dataset information including file counts and formats.
+    """
+    try:
+        # If analyzing uploaded content
+        if targets_csv_content is not None:
+            import csv
+            from io import StringIO
+            reader = csv.reader(StringIO(targets_csv_content))
+            rows = list(reader)
+            num_samples = len(rows)
+            # Parse target values
+            target_values = []
+            for row in rows:
+                if len(row) >= 2:
+                    try:
+                        target_values.append(float(row[1]))
+                    except:
+                        pass
+            result = {
+                "success": True,
+                "source": "uploaded_content",
+                "num_samples": num_samples,
+                "has_targets_csv": True,
+                "ready_for_training": True
+            }
+            if target_values:
+                result["target_statistics"] = {
+                    "min": min(target_values),
+                    "max": max(target_values),
+                    "mean": sum(target_values) / len(target_values)
+                }
+            if structure_files:
+                extensions = {}
+                for f in structure_files:
+                    ext = os.path.splitext(f)[1].lower()
+                    extensions[ext] = extensions.get(ext, 0) + 1
+                result["file_extensions"] = extensions
+                result["num_structure_files"] = len(structure_files)
+            return result
+        # Traditional path-based analysis
+        if data_path is None:
+            return {"success": False, "error": "Either data_path or targets_csv_content must be provided"}
+        if not os.path.exists(data_path):
+            return {"success": False, "error": f"Data path not found: {data_path}"}
+        # Count files by extension
+        extensions = {}
+        for file in os.listdir(data_path):
+            ext = os.path.splitext(file)[1].lower()
+            extensions[ext] = extensions.get(ext, 0) + 1
+        # Check for required files
+        has_targets = os.path.exists(os.path.join(data_path, "targets.csv"))
+        has_atom_dict = os.path.exists(os.path.join(data_path, "atom_dict.json"))
+        has_processed = os.path.exists(os.path.join(data_path, "processed"))
+        # Read targets if available
+        num_samples = 0
+        if has_targets:
+            import csv
+            with open(os.path.join(data_path, "targets.csv")) as f:
+                num_samples = sum(1 for _ in csv.reader(f))
+        return {
+            "success": True,
+            "source": "server_path",
+            "data_path": data_path,
+            "file_extensions": extensions,
+            "has_targets_csv": has_targets,
+            "has_atom_dict": has_atom_dict,
+            "has_processed_data": has_processed,
+            "num_samples": num_samples,
+            "ready_for_training": has_targets
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="quick_structure_analysis", description="Quick analysis of a structure file content without needing a server path. Ideal for analyzing uploaded files from Cursor.")
+def quick_structure_analysis(
+    file_content: str,
+    file_format: str,
+    include_positions: bool = False,
+    include_distances: bool = True
+) -> dict:
+    """
+    Perform quick analysis on structure file content uploaded directly.
+    This is the recommended tool for analyzing structures when using remote MCP.
+    Parameters:
+        file_content (str): The complete content of the structure file.
+        file_format (str): Format of the file - 'cif', 'xyz', 'vasp'/'poscar', 'json', 'extxyz'.
+        include_positions (bool): Whether to include atomic positions in output (default: False).
+        include_distances (bool): Whether to include distance analysis (default: True).
+    Returns:
+        dict: Comprehensive structure analysis.
+    Example:
+        quick_structure_analysis(
+            file_content="data_NaCl\\n_cell_length_a 5.64...",
+            file_format="cif"
+        )
+    """
+    try:
+        import ase
+        from ase import io
+        # Map format names
+        format_map = {
+            'cif': 'cif',
+            'xyz': 'xyz',
+            'vasp': 'vasp',
+            'poscar': 'vasp',
+            'json': 'json',
+            'extxyz': 'extxyz'
+        }
+        fmt = format_map.get(file_format.lower())
+        if fmt is None:
+            return {
+                "success": False,
+                "error": f"Unsupported format: {file_format}. Supported: cif, xyz, vasp, poscar, json, extxyz"
+            }
+        # Write to temp file and read
+        with tempfile.NamedTemporaryFile(mode='w', suffix=f'.{fmt}', delete=False) as tmp:
+            tmp.write(file_content)
+            tmp_path = tmp.name
+        try:
+            structure = ase.io.read(tmp_path, format=fmt)
+        finally:
+            os.unlink(tmp_path)
+        # Basic analysis
+        symbols = structure.get_chemical_symbols()
+        cell = structure.get_cell().tolist() if any(structure.pbc) else None
+        pbc = structure.pbc.tolist()
+        result = {
+            "success": True,
+            "num_atoms": len(structure),
+            "chemical_formula": structure.get_chemical_formula(),
+            "reduced_formula": structure.get_chemical_formula(mode='reduce'),
+            "elements": sorted(list(set(symbols))),
+            "element_counts": {elem: symbols.count(elem) for elem in set(symbols)},
+            "has_periodicity": any(pbc),
+            "pbc": pbc,
+            "cell_parameters": cell,
+            "volume": float(structure.get_volume()) if any(pbc) else None,
+        }
+        if include_positions:
+            positions = structure.get_positions().tolist()
+            result["positions"] = positions
+            result["symbols"] = symbols
+        if include_distances:
+            distance_matrix = structure.get_all_distances(mic=True)
+            cutoff_radius = 8.0
+            neighbors_count = []
+            for i in range(len(structure)):
+                neighbors = np.sum((distance_matrix[i] > 0) & (distance_matrix[i] < cutoff_radius))
+                neighbors_count.append(int(neighbors))
+            result["distance_analysis"] = {
+                "cutoff_radius": cutoff_radius,
+                "average_neighbors": float(np.mean(neighbors_count)),
+                "min_neighbors": min(neighbors_count),
+                "max_neighbors": max(neighbors_count),
+                "min_distance": float(distance_matrix[distance_matrix > 0].min()),
+                "max_distance": float(distance_matrix.max())
+            }
+        # Check if suitable for GNN
+        result["gnn_suitable"] = {
+            "has_enough_atoms": len(structure) >= 2,
+            "has_3d_coordinates": True,
+            "is_periodic": any(pbc),
+            "recommendation": "Suitable for GNN training" if len(structure) >= 2 else "Too few atoms"
+        }
+        return result
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+def create_app() -> FastMCP:
+    """
+    Creates and returns the FastMCP application instance.
+    Returns:
+        FastMCP: The FastMCP application instance.
+    """
+    return mcp