SEUyishu commited on
Commit
6f5c6b4
·
verified ·
1 Parent(s): aa61e51

Update mcp_output/mcp_plugin/mcp_service.py

Browse files
Files changed (1) hide show
  1. mcp_output/mcp_plugin/mcp_service.py +894 -640
mcp_output/mcp_plugin/mcp_service.py CHANGED
@@ -1,640 +1,894 @@
1
- """
2
- MatDeepLearn MCP Service
3
- A Model Context Protocol service for materials property prediction using Graph Neural Networks.
4
- """
5
-
6
- import os
7
- import sys
8
- import json
9
- import tempfile
10
- import yaml
11
- import numpy as np
12
- from typing import Optional, List, Dict, Any
13
- from pathlib import Path
14
-
15
- # Add MatDeepLearn to path
16
- project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
17
- if project_root not in sys.path:
18
- sys.path.insert(0, project_root)
19
-
20
- from fastmcp import FastMCP
21
-
22
- # Import MatDeepLearn modules
23
- try:
24
- import torch
25
- from matdeeplearn import models, process, training
26
- from matdeeplearn.models.utils import model_summary
27
- MATDEEPLEARN_AVAILABLE = True
28
- except ImportError as e:
29
- MATDEEPLEARN_AVAILABLE = False
30
- IMPORT_ERROR = str(e)
31
-
32
- mcp = FastMCP("matdeeplearn_service")
33
-
34
-
35
- @mcp.tool(name="check_environment", description="Check if MatDeepLearn environment is properly configured and GPU is available.")
36
- def check_environment() -> dict:
37
- """
38
- Check if the MatDeepLearn environment is properly configured.
39
-
40
- Returns:
41
- dict: Contains environment status including GPU availability.
42
- """
43
- try:
44
- if not MATDEEPLEARN_AVAILABLE:
45
- return {
46
- "success": False,
47
- "error": f"MatDeepLearn not available: {IMPORT_ERROR}"
48
- }
49
-
50
- gpu_available = torch.cuda.is_available()
51
- gpu_count = torch.cuda.device_count() if gpu_available else 0
52
- gpu_name = torch.cuda.get_device_name(0) if gpu_available else "N/A"
53
-
54
- return {
55
- "success": True,
56
- "matdeeplearn_available": True,
57
- "torch_version": torch.__version__,
58
- "gpu_available": gpu_available,
59
- "gpu_count": gpu_count,
60
- "gpu_name": gpu_name,
61
- "available_models": [
62
- "CGCNN_demo", "MPNN_demo", "SchNet_demo",
63
- "MEGNet_demo", "GCN_demo", "SOAP_demo", "SM_demo"
64
- ]
65
- }
66
- except Exception as e:
67
- return {"success": False, "error": str(e)}
68
-
69
-
70
- @mcp.tool(name="list_available_models", description="List all available GNN models in MatDeepLearn.")
71
- def list_available_models() -> dict:
72
- """
73
- List all available Graph Neural Network models.
74
-
75
- Returns:
76
- dict: Contains list of available models with descriptions.
77
- """
78
- try:
79
- models_info = {
80
- "CGCNN_demo": {
81
- "name": "Crystal Graph Convolutional Neural Network",
82
- "description": "A GNN for predicting material properties using crystal graphs.",
83
- "paper": "Xie & Grossman, PRL 2018"
84
- },
85
- "MPNN_demo": {
86
- "name": "Message Passing Neural Network",
87
- "description": "General message passing framework for molecular graphs.",
88
- "paper": "Gilmer et al., ICML 2017"
89
- },
90
- "SchNet_demo": {
91
- "name": "SchNet",
92
- "description": "Continuous-filter convolutional neural network for modeling quantum interactions.",
93
- "paper": "Schütt et al., JCP 2017"
94
- },
95
- "MEGNet_demo": {
96
- "name": "MatErials Graph Network",
97
- "description": "Graph network with global state for materials property prediction.",
98
- "paper": "Chen et al., Chem. Mater. 2019"
99
- },
100
- "GCN_demo": {
101
- "name": "Graph Convolutional Network",
102
- "description": "Standard graph convolutional network architecture.",
103
- "paper": "Kipf & Welling, ICLR 2017"
104
- },
105
- "SOAP_demo": {
106
- "name": "Smooth Overlap of Atomic Positions",
107
- "description": "Descriptor-based method using SOAP features.",
108
- "paper": "Bartók et al., PRB 2013"
109
- },
110
- "SM_demo": {
111
- "name": "Sine Matrix",
112
- "description": "Descriptor-based method using Sine/Coulomb matrix features.",
113
- "paper": "Various"
114
- }
115
- }
116
-
117
- return {
118
- "success": True,
119
- "models": models_info,
120
- "total_models": len(models_info)
121
- }
122
- except Exception as e:
123
- return {"success": False, "error": str(e)}
124
-
125
-
126
- @mcp.tool(name="get_model_config", description="Get the default configuration for a specific model.")
127
- def get_model_config(model_name: str) -> dict:
128
- """
129
- Get the default configuration for a specific GNN model.
130
-
131
- Parameters:
132
- model_name (str): Name of the model (e.g., 'CGCNN_demo', 'SchNet_demo').
133
-
134
- Returns:
135
- dict: Contains the default configuration for the model.
136
- """
137
- try:
138
- config_path = os.path.join(project_root, "config.yml")
139
-
140
- if not os.path.exists(config_path):
141
- return {"success": False, "error": "Config file not found"}
142
-
143
- with open(config_path, "r") as f:
144
- config = yaml.load(f, Loader=yaml.FullLoader)
145
-
146
- if model_name not in config.get("Models", {}):
147
- return {
148
- "success": False,
149
- "error": f"Model '{model_name}' not found. Available models: {list(config.get('Models', {}).keys())}"
150
- }
151
-
152
- model_config = config["Models"][model_name]
153
- processing_config = config.get("Processing", {})
154
- training_config = config.get("Training", {})
155
-
156
- return {
157
- "success": True,
158
- "model_name": model_name,
159
- "model_config": model_config,
160
- "processing_config": processing_config,
161
- "training_config": training_config
162
- }
163
- except Exception as e:
164
- return {"success": False, "error": str(e)}
165
-
166
-
167
- @mcp.tool(name="process_structure_data", description="Process atomic structure data into graph format for GNN training.")
168
- def process_structure_data(
169
- data_path: str,
170
- target_index: int = 0,
171
- graph_max_radius: float = 8.0,
172
- graph_max_neighbors: int = 12,
173
- reprocess: bool = False
174
- ) -> dict:
175
- """
176
- Process atomic structure data into graph format.
177
-
178
- Parameters:
179
- data_path (str): Path to directory containing structure files and targets.csv.
180
- target_index (int): Index of target column in targets.csv (default: 0).
181
- graph_max_radius (float): Maximum radius for edges in graph (default: 8.0).
182
- graph_max_neighbors (int): Maximum number of neighbors per atom (default: 12).
183
- reprocess (bool): Whether to reprocess data even if processed files exist.
184
-
185
- Returns:
186
- dict: Contains processing status and dataset information.
187
- """
188
- try:
189
- if not MATDEEPLEARN_AVAILABLE:
190
- return {"success": False, "error": "MatDeepLearn not available"}
191
-
192
- if not os.path.exists(data_path):
193
- return {"success": False, "error": f"Data path not found: {data_path}"}
194
-
195
- processing_args = {
196
- "dataset_type": "inmemory",
197
- "data_path": data_path,
198
- "target_path": "targets.csv",
199
- "dictionary_source": "default",
200
- "dictionary_path": "atom_dict.json",
201
- "data_format": "json",
202
- "verbose": "True",
203
- "graph_max_radius": graph_max_radius,
204
- "graph_max_neighbors": graph_max_neighbors,
205
- "voronoi": "False",
206
- "edge_features": "True",
207
- "graph_edge_length": 50,
208
- "SM_descriptor": "False",
209
- "SOAP_descriptor": "False"
210
- }
211
-
212
- dataset = process.get_dataset(
213
- data_path,
214
- target_index,
215
- "True" if reprocess else "False",
216
- processing_args
217
- )
218
-
219
- return {
220
- "success": True,
221
- "dataset_size": len(dataset),
222
- "sample_data": {
223
- "num_nodes": dataset[0].x.shape[0] if len(dataset) > 0 else 0,
224
- "num_node_features": dataset[0].x.shape[1] if len(dataset) > 0 else 0,
225
- "num_edges": dataset[0].edge_index.shape[1] if len(dataset) > 0 else 0
226
- },
227
- "data_path": data_path
228
- }
229
- except Exception as e:
230
- return {"success": False, "error": str(e)}
231
-
232
-
233
- @mcp.tool(name="train_model", description="Train a GNN model on processed structure data.")
234
- def train_model(
235
- data_path: str,
236
- model_name: str = "CGCNN_demo",
237
- epochs: int = 100,
238
- batch_size: int = 32,
239
- learning_rate: float = 0.002,
240
- train_ratio: float = 0.8,
241
- val_ratio: float = 0.1,
242
- test_ratio: float = 0.1,
243
- save_model: bool = True,
244
- model_path: str = "trained_model.pth"
245
- ) -> dict:
246
- """
247
- Train a GNN model on processed structure data.
248
-
249
- Parameters:
250
- data_path (str): Path to directory containing processed structure data.
251
- model_name (str): Name of the model to train (default: 'CGCNN_demo').
252
- epochs (int): Number of training epochs (default: 100).
253
- batch_size (int): Training batch size (default: 32).
254
- learning_rate (float): Learning rate (default: 0.002).
255
- train_ratio (float): Ratio of data for training (default: 0.8).
256
- val_ratio (float): Ratio of data for validation (default: 0.1).
257
- test_ratio (float): Ratio of data for testing (default: 0.1).
258
- save_model (bool): Whether to save the trained model (default: True).
259
- model_path (str): Path to save the trained model (default: 'trained_model.pth').
260
-
261
- Returns:
262
- dict: Contains training results including train/val/test errors.
263
- """
264
- try:
265
- if not MATDEEPLEARN_AVAILABLE:
266
- return {"success": False, "error": "MatDeepLearn not available"}
267
-
268
- if not os.path.exists(data_path):
269
- return {"success": False, "error": f"Data path not found: {data_path}"}
270
-
271
- # Load default config
272
- config_path = os.path.join(project_root, "config.yml")
273
- with open(config_path, "r") as f:
274
- config = yaml.load(f, Loader=yaml.FullLoader)
275
-
276
- if model_name not in config.get("Models", {}):
277
- return {"success": False, "error": f"Model '{model_name}' not found"}
278
-
279
- # Prepare configuration
280
- job_config = {
281
- "job_name": "mcp_train_job",
282
- "reprocess": "False",
283
- "model": model_name,
284
- "load_model": "False",
285
- "save_model": "True" if save_model else "False",
286
- "model_path": model_path,
287
- "write_output": "True",
288
- "parallel": "False",
289
- "seed": np.random.randint(1, 1e6)
290
- }
291
-
292
- training_config = {
293
- "target_index": 0,
294
- "loss": "l1_loss",
295
- "train_ratio": train_ratio,
296
- "val_ratio": val_ratio,
297
- "test_ratio": test_ratio,
298
- "verbosity": 5
299
- }
300
-
301
- model_config = config["Models"][model_name].copy()
302
- model_config["epochs"] = epochs
303
- model_config["batch_size"] = batch_size
304
- model_config["lr"] = learning_rate
305
-
306
- # Determine device
307
- world_size = torch.cuda.device_count()
308
- if world_size == 0:
309
- rank = "cpu"
310
- else:
311
- rank = "cuda"
312
-
313
- # Train model
314
- error_values = training.train_regular(
315
- rank,
316
- world_size,
317
- data_path,
318
- job_config,
319
- training_config,
320
- model_config
321
- )
322
-
323
- return {
324
- "success": True,
325
- "model_name": model_name,
326
- "epochs": epochs,
327
- "train_error": float(error_values[0]) if error_values is not None else None,
328
- "val_error": float(error_values[1]) if error_values is not None else None,
329
- "test_error": float(error_values[2]) if error_values is not None else None,
330
- "model_saved": save_model,
331
- "model_path": model_path if save_model else None
332
- }
333
- except Exception as e:
334
- return {"success": False, "error": str(e)}
335
-
336
-
337
- @mcp.tool(name="predict_properties", description="Use a trained model to predict properties of new structures.")
338
- def predict_properties(
339
- data_path: str,
340
- model_path: str,
341
- target_index: int = 0
342
- ) -> dict:
343
- """
344
- Use a trained model to predict properties of new structures.
345
-
346
- Parameters:
347
- data_path (str): Path to directory containing structure files to predict.
348
- model_path (str): Path to the trained model file (.pth).
349
- target_index (int): Index of target column (default: 0).
350
-
351
- Returns:
352
- dict: Contains predictions and error metrics.
353
- """
354
- try:
355
- if not MATDEEPLEARN_AVAILABLE:
356
- return {"success": False, "error": "MatDeepLearn not available"}
357
-
358
- if not os.path.exists(data_path):
359
- return {"success": False, "error": f"Data path not found: {data_path}"}
360
-
361
- if not os.path.exists(model_path):
362
- return {"success": False, "error": f"Model file not found: {model_path}"}
363
-
364
- # Get dataset
365
- dataset = process.get_dataset(data_path, target_index, "False")
366
-
367
- job_config = {
368
- "job_name": "mcp_predict_job",
369
- "model_path": model_path,
370
- "write_output": "True"
371
- }
372
-
373
- # Run prediction
374
- test_error = training.predict(dataset, "l1_loss", job_config)
375
-
376
- return {
377
- "success": True,
378
- "dataset_size": len(dataset),
379
- "test_error": float(test_error),
380
- "output_file": "mcp_predict_job_predicted_outputs.csv"
381
- }
382
- except Exception as e:
383
- return {"success": False, "error": str(e)}
384
-
385
-
386
- @mcp.tool(name="cross_validation", description="Perform k-fold cross validation on a dataset.")
387
- def cross_validation(
388
- data_path: str,
389
- model_name: str = "CGCNN_demo",
390
- cv_folds: int = 5,
391
- epochs: int = 100
392
- ) -> dict:
393
- """
394
- Perform k-fold cross validation on a dataset.
395
-
396
- Parameters:
397
- data_path (str): Path to directory containing structure data.
398
- model_name (str): Name of the model to use (default: 'CGCNN_demo').
399
- cv_folds (int): Number of cross-validation folds (default: 5).
400
- epochs (int): Number of training epochs per fold (default: 100).
401
-
402
- Returns:
403
- dict: Contains cross-validation results.
404
- """
405
- try:
406
- if not MATDEEPLEARN_AVAILABLE:
407
- return {"success": False, "error": "MatDeepLearn not available"}
408
-
409
- if not os.path.exists(data_path):
410
- return {"success": False, "error": f"Data path not found: {data_path}"}
411
-
412
- # Load config
413
- config_path = os.path.join(project_root, "config.yml")
414
- with open(config_path, "r") as f:
415
- config = yaml.load(f, Loader=yaml.FullLoader)
416
-
417
- if model_name not in config.get("Models", {}):
418
- return {"success": False, "error": f"Model '{model_name}' not found"}
419
-
420
- job_config = {
421
- "job_name": "mcp_cv_job",
422
- "reprocess": "False",
423
- "model": model_name,
424
- "cv_folds": cv_folds,
425
- "write_output": "True",
426
- "parallel": "False",
427
- "seed": np.random.randint(1, 1e6)
428
- }
429
-
430
- training_config = {
431
- "target_index": 0,
432
- "loss": "l1_loss",
433
- "verbosity": 5
434
- }
435
-
436
- model_config = config["Models"][model_name].copy()
437
- model_config["epochs"] = epochs
438
-
439
- world_size = torch.cuda.device_count()
440
- rank = "cpu" if world_size == 0 else "cuda"
441
-
442
- cv_error = training.train_CV(
443
- rank,
444
- world_size,
445
- data_path,
446
- job_config,
447
- training_config,
448
- model_config
449
- )
450
-
451
- return {
452
- "success": True,
453
- "model_name": model_name,
454
- "cv_folds": cv_folds,
455
- "cv_error": float(cv_error) if cv_error is not None else None,
456
- "output_file": "mcp_cv_job_CV_outputs.csv"
457
- }
458
- except Exception as e:
459
- return {"success": False, "error": str(e)}
460
-
461
-
462
- @mcp.tool(name="analyze_structure", description="Analyze the structure of atomic data and convert to graph representation info.")
463
- def analyze_structure(structure_file: str) -> dict:
464
- """
465
- Analyze the structure of an atomic structure file.
466
-
467
- Parameters:
468
- structure_file (str): Path to a structure file (json, cif, xyz, POSCAR, etc.).
469
-
470
- Returns:
471
- dict: Contains structure analysis including atoms, bonds, and graph info.
472
- """
473
- try:
474
- if not os.path.exists(structure_file):
475
- return {"success": False, "error": f"Structure file not found: {structure_file}"}
476
-
477
- import ase
478
- from ase import io
479
-
480
- # Read structure
481
- structure = ase.io.read(structure_file)
482
-
483
- # Get basic info
484
- symbols = structure.get_chemical_symbols()
485
- positions = structure.get_positions().tolist()
486
- cell = structure.get_cell().tolist() if any(structure.pbc) else None
487
- pbc = structure.pbc.tolist()
488
-
489
- # Get distance matrix
490
- distance_matrix = structure.get_all_distances(mic=True)
491
-
492
- # Analyze connectivity
493
- cutoff_radius = 8.0
494
- neighbors_count = []
495
- for i in range(len(structure)):
496
- neighbors = np.sum((distance_matrix[i] > 0) & (distance_matrix[i] < cutoff_radius))
497
- neighbors_count.append(int(neighbors))
498
-
499
- return {
500
- "success": True,
501
- "num_atoms": len(structure),
502
- "chemical_formula": structure.get_chemical_formula(),
503
- "elements": list(set(symbols)),
504
- "element_counts": {elem: symbols.count(elem) for elem in set(symbols)},
505
- "has_periodicity": any(pbc),
506
- "pbc": pbc,
507
- "cell": cell,
508
- "average_neighbors": float(np.mean(neighbors_count)),
509
- "min_neighbors": min(neighbors_count),
510
- "max_neighbors": max(neighbors_count),
511
- "min_distance": float(distance_matrix[distance_matrix > 0].min()),
512
- "max_distance": float(distance_matrix.max())
513
- }
514
- except Exception as e:
515
- return {"success": False, "error": str(e)}
516
-
517
-
518
- @mcp.tool(name="compare_models", description="Compare performance of different GNN models on a dataset.")
519
- def compare_models(
520
- data_path: str,
521
- model_list: List[str] = None,
522
- epochs: int = 50
523
- ) -> dict:
524
- """
525
- Compare performance of different GNN models on a dataset.
526
-
527
- Parameters:
528
- data_path (str): Path to directory containing structure data.
529
- model_list (List[str]): List of models to compare (default: all available).
530
- epochs (int): Number of training epochs per model (default: 50).
531
-
532
- Returns:
533
- dict: Contains comparison results for each model.
534
- """
535
- try:
536
- if not MATDEEPLEARN_AVAILABLE:
537
- return {"success": False, "error": "MatDeepLearn not available"}
538
-
539
- if not os.path.exists(data_path):
540
- return {"success": False, "error": f"Data path not found: {data_path}"}
541
-
542
- if model_list is None:
543
- model_list = ["CGCNN_demo", "GCN_demo", "SchNet_demo"]
544
-
545
- results = {}
546
-
547
- for model_name in model_list:
548
- try:
549
- result = train_model(
550
- data_path=data_path,
551
- model_name=model_name,
552
- epochs=epochs,
553
- save_model=False
554
- )
555
-
556
- if result["success"]:
557
- results[model_name] = {
558
- "train_error": result["train_error"],
559
- "val_error": result["val_error"],
560
- "test_error": result["test_error"]
561
- }
562
- else:
563
- results[model_name] = {"error": result["error"]}
564
- except Exception as e:
565
- results[model_name] = {"error": str(e)}
566
-
567
- # Find best model
568
- best_model = None
569
- best_error = float("inf")
570
- for model, res in results.items():
571
- if "test_error" in res and res["test_error"] is not None:
572
- if res["test_error"] < best_error:
573
- best_error = res["test_error"]
574
- best_model = model
575
-
576
- return {
577
- "success": True,
578
- "results": results,
579
- "best_model": best_model,
580
- "best_test_error": best_error if best_model else None
581
- }
582
- except Exception as e:
583
- return {"success": False, "error": str(e)}
584
-
585
-
586
- @mcp.tool(name="get_dataset_info", description="Get information about a dataset directory.")
587
- def get_dataset_info(data_path: str) -> dict:
588
- """
589
- Get information about a dataset directory.
590
-
591
- Parameters:
592
- data_path (str): Path to directory containing structure data.
593
-
594
- Returns:
595
- dict: Contains dataset information including file counts and formats.
596
- """
597
- try:
598
- if not os.path.exists(data_path):
599
- return {"success": False, "error": f"Data path not found: {data_path}"}
600
-
601
- # Count files by extension
602
- extensions = {}
603
- for file in os.listdir(data_path):
604
- ext = os.path.splitext(file)[1].lower()
605
- extensions[ext] = extensions.get(ext, 0) + 1
606
-
607
- # Check for required files
608
- has_targets = os.path.exists(os.path.join(data_path, "targets.csv"))
609
- has_atom_dict = os.path.exists(os.path.join(data_path, "atom_dict.json"))
610
- has_processed = os.path.exists(os.path.join(data_path, "processed"))
611
-
612
- # Read targets if available
613
- num_samples = 0
614
- if has_targets:
615
- import csv
616
- with open(os.path.join(data_path, "targets.csv")) as f:
617
- num_samples = sum(1 for _ in csv.reader(f))
618
-
619
- return {
620
- "success": True,
621
- "data_path": data_path,
622
- "file_extensions": extensions,
623
- "has_targets_csv": has_targets,
624
- "has_atom_dict": has_atom_dict,
625
- "has_processed_data": has_processed,
626
- "num_samples": num_samples,
627
- "ready_for_training": has_targets
628
- }
629
- except Exception as e:
630
- return {"success": False, "error": str(e)}
631
-
632
-
633
- def create_app() -> FastMCP:
634
- """
635
- Creates and returns the FastMCP application instance.
636
-
637
- Returns:
638
- FastMCP: The FastMCP application instance.
639
- """
640
- return mcp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MatDeepLearn MCP Service
3
+ A Model Context Protocol service for materials property prediction using Graph Neural Networks.
4
+ """
5
+
6
+ import os
7
+ import sys
8
+ import json
9
+ import tempfile
10
+ import yaml
11
+ import numpy as np
12
+ from typing import Optional, List, Dict, Any
13
+ from pathlib import Path
14
+
15
+ # Add MatDeepLearn to path
16
+ project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
17
+ if project_root not in sys.path:
18
+ sys.path.insert(0, project_root)
19
+
20
+ from fastmcp import FastMCP
21
+
22
+ # Import MatDeepLearn modules
23
+ try:
24
+ import torch
25
+ from matdeeplearn import models, process, training
26
+ from matdeeplearn.models.utils import model_summary
27
+ MATDEEPLEARN_AVAILABLE = True
28
+ except ImportError as e:
29
+ MATDEEPLEARN_AVAILABLE = False
30
+ IMPORT_ERROR = str(e)
31
+
32
+ mcp = FastMCP("matdeeplearn_service")
33
+
34
+
35
+ @mcp.tool(name="check_environment", description="Check if MatDeepLearn environment is properly configured and GPU is available.")
36
+ def check_environment() -> dict:
37
+ """
38
+ Check if the MatDeepLearn environment is properly configured.
39
+
40
+ Returns:
41
+ dict: Contains environment status including GPU availability.
42
+ """
43
+ try:
44
+ if not MATDEEPLEARN_AVAILABLE:
45
+ return {
46
+ "success": False,
47
+ "error": f"MatDeepLearn not available: {IMPORT_ERROR}"
48
+ }
49
+
50
+ gpu_available = torch.cuda.is_available()
51
+ gpu_count = torch.cuda.device_count() if gpu_available else 0
52
+ gpu_name = torch.cuda.get_device_name(0) if gpu_available else "N/A"
53
+
54
+ return {
55
+ "success": True,
56
+ "matdeeplearn_available": True,
57
+ "torch_version": torch.__version__,
58
+ "gpu_available": gpu_available,
59
+ "gpu_count": gpu_count,
60
+ "gpu_name": gpu_name,
61
+ "available_models": [
62
+ "CGCNN_demo", "MPNN_demo", "SchNet_demo",
63
+ "MEGNet_demo", "GCN_demo", "SOAP_demo", "SM_demo"
64
+ ]
65
+ }
66
+ except Exception as e:
67
+ return {"success": False, "error": str(e)}
68
+
69
+
70
+ @mcp.tool(name="list_available_models", description="List all available GNN models in MatDeepLearn.")
71
+ def list_available_models() -> dict:
72
+ """
73
+ List all available Graph Neural Network models.
74
+
75
+ Returns:
76
+ dict: Contains list of available models with descriptions.
77
+ """
78
+ try:
79
+ models_info = {
80
+ "CGCNN_demo": {
81
+ "name": "Crystal Graph Convolutional Neural Network",
82
+ "description": "A GNN for predicting material properties using crystal graphs.",
83
+ "paper": "Xie & Grossman, PRL 2018"
84
+ },
85
+ "MPNN_demo": {
86
+ "name": "Message Passing Neural Network",
87
+ "description": "General message passing framework for molecular graphs.",
88
+ "paper": "Gilmer et al., ICML 2017"
89
+ },
90
+ "SchNet_demo": {
91
+ "name": "SchNet",
92
+ "description": "Continuous-filter convolutional neural network for modeling quantum interactions.",
93
+ "paper": "Schütt et al., JCP 2017"
94
+ },
95
+ "MEGNet_demo": {
96
+ "name": "MatErials Graph Network",
97
+ "description": "Graph network with global state for materials property prediction.",
98
+ "paper": "Chen et al., Chem. Mater. 2019"
99
+ },
100
+ "GCN_demo": {
101
+ "name": "Graph Convolutional Network",
102
+ "description": "Standard graph convolutional network architecture.",
103
+ "paper": "Kipf & Welling, ICLR 2017"
104
+ },
105
+ "SOAP_demo": {
106
+ "name": "Smooth Overlap of Atomic Positions",
107
+ "description": "Descriptor-based method using SOAP features.",
108
+ "paper": "Bartók et al., PRB 2013"
109
+ },
110
+ "SM_demo": {
111
+ "name": "Sine Matrix",
112
+ "description": "Descriptor-based method using Sine/Coulomb matrix features.",
113
+ "paper": "Various"
114
+ }
115
+ }
116
+
117
+ return {
118
+ "success": True,
119
+ "models": models_info,
120
+ "total_models": len(models_info)
121
+ }
122
+ except Exception as e:
123
+ return {"success": False, "error": str(e)}
124
+
125
+
126
+ @mcp.tool(name="get_model_config", description="Get the default configuration for a specific model.")
127
+ def get_model_config(model_name: str) -> dict:
128
+ """
129
+ Get the default configuration for a specific GNN model.
130
+
131
+ Parameters:
132
+ model_name (str): Name of the model (e.g., 'CGCNN_demo', 'SchNet_demo').
133
+
134
+ Returns:
135
+ dict: Contains the default configuration for the model.
136
+ """
137
+ try:
138
+ config_path = os.path.join(project_root, "config.yml")
139
+
140
+ if not os.path.exists(config_path):
141
+ return {"success": False, "error": "Config file not found"}
142
+
143
+ with open(config_path, "r") as f:
144
+ config = yaml.load(f, Loader=yaml.FullLoader)
145
+
146
+ if model_name not in config.get("Models", {}):
147
+ return {
148
+ "success": False,
149
+ "error": f"Model '{model_name}' not found. Available models: {list(config.get('Models', {}).keys())}"
150
+ }
151
+
152
+ model_config = config["Models"][model_name]
153
+ processing_config = config.get("Processing", {})
154
+ training_config = config.get("Training", {})
155
+
156
+ return {
157
+ "success": True,
158
+ "model_name": model_name,
159
+ "model_config": model_config,
160
+ "processing_config": processing_config,
161
+ "training_config": training_config
162
+ }
163
+ except Exception as e:
164
+ return {"success": False, "error": str(e)}
165
+
166
+
167
+ @mcp.tool(name="process_structure_data", description="Process atomic structure data into graph format for GNN training. Provide either data_path (server path) or structure_contents (direct file contents).")
168
+ def process_structure_data(
169
+ data_path: Optional[str] = None,
170
+ structure_contents: Optional[Dict[str, str]] = None,
171
+ targets_csv: Optional[str] = None,
172
+ target_index: int = 0,
173
+ graph_max_radius: float = 8.0,
174
+ graph_max_neighbors: int = 12,
175
+ reprocess: bool = False
176
+ ) -> dict:
177
+ """
178
+ Process atomic structure data into graph format.
179
+
180
+ Parameters:
181
+ data_path (str, optional): Path to directory containing structure files (server-side).
182
+ structure_contents (dict, optional): Dictionary mapping filenames to file contents.
183
+ Example: {"structure1.cif": "CIF content...", "structure2.cif": "..."}
184
+ targets_csv (str, optional): Content of targets.csv file. Required with structure_contents.
185
+ target_index (int): Index of target column in targets.csv (default: 0).
186
+ graph_max_radius (float): Maximum radius for edges in graph (default: 8.0).
187
+ graph_max_neighbors (int): Maximum number of neighbors per atom (default: 12).
188
+ reprocess (bool): Whether to reprocess data even if processed files exist.
189
+
190
+ Returns:
191
+ dict: Contains processing status and dataset information.
192
+
193
+ Example usage with direct content:
194
+ process_structure_data(
195
+ structure_contents={"struct1.cif": "CIF content...", "struct2.cif": "..."},
196
+ targets_csv="struct1,1.5\\nstruct2,2.3"
197
+ )
198
+ """
199
+ try:
200
+ if not MATDEEPLEARN_AVAILABLE:
201
+ return {"success": False, "error": "MatDeepLearn not available"}
202
+
203
+ # If structure_contents provided, create temp directory
204
+ temp_dir = None
205
+ if structure_contents is not None:
206
+ if targets_csv is None:
207
+ return {"success": False, "error": "targets_csv is required when providing structure_contents"}
208
+
209
+ # Create temporary directory with uploaded files
210
+ temp_dir = tempfile.mkdtemp(prefix="mcp_data_")
211
+
212
+ # Write structure files
213
+ for filename, content in structure_contents.items():
214
+ filepath = os.path.join(temp_dir, filename)
215
+ with open(filepath, 'w') as f:
216
+ f.write(content)
217
+
218
+ # Write targets.csv
219
+ with open(os.path.join(temp_dir, "targets.csv"), 'w') as f:
220
+ f.write(targets_csv)
221
+
222
+ data_path = temp_dir
223
+
224
+ if data_path is None:
225
+ return {"success": False, "error": "Either data_path or structure_contents must be provided"}
226
+
227
+ if not os.path.exists(data_path):
228
+ return {"success": False, "error": f"Data path not found: {data_path}"}
229
+
230
+ processing_args = {
231
+ "dataset_type": "inmemory",
232
+ "data_path": data_path,
233
+ "target_path": "targets.csv",
234
+ "dictionary_source": "default",
235
+ "dictionary_path": "atom_dict.json",
236
+ "data_format": "json",
237
+ "verbose": "True",
238
+ "graph_max_radius": graph_max_radius,
239
+ "graph_max_neighbors": graph_max_neighbors,
240
+ "voronoi": "False",
241
+ "edge_features": "True",
242
+ "graph_edge_length": 50,
243
+ "SM_descriptor": "False",
244
+ "SOAP_descriptor": "False"
245
+ }
246
+
247
+ dataset = process.get_dataset(
248
+ data_path,
249
+ target_index,
250
+ "True" if reprocess else "False",
251
+ processing_args
252
+ )
253
+
254
+ result = {
255
+ "success": True,
256
+ "dataset_size": len(dataset),
257
+ "sample_data": {
258
+ "num_nodes": dataset[0].x.shape[0] if len(dataset) > 0 else 0,
259
+ "num_node_features": dataset[0].x.shape[1] if len(dataset) > 0 else 0,
260
+ "num_edges": dataset[0].edge_index.shape[1] if len(dataset) > 0 else 0
261
+ },
262
+ "data_path": data_path,
263
+ "is_temporary": temp_dir is not None
264
+ }
265
+
266
+ # Note: Don't delete temp_dir yet, it may be needed for training
267
+ if temp_dir:
268
+ result["temp_data_path"] = temp_dir
269
+
270
+ return result
271
+ except Exception as e:
272
+ return {"success": False, "error": str(e)}
273
+
274
+
275
+ @mcp.tool(name="train_model", description="Train a GNN model on processed structure data.")
276
+ def train_model(
277
+ data_path: str,
278
+ model_name: str = "CGCNN_demo",
279
+ epochs: int = 100,
280
+ batch_size: int = 32,
281
+ learning_rate: float = 0.002,
282
+ train_ratio: float = 0.8,
283
+ val_ratio: float = 0.1,
284
+ test_ratio: float = 0.1,
285
+ save_model: bool = True,
286
+ model_path: str = "trained_model.pth"
287
+ ) -> dict:
288
+ """
289
+ Train a GNN model on processed structure data.
290
+
291
+ Parameters:
292
+ data_path (str): Path to directory containing processed structure data.
293
+ model_name (str): Name of the model to train (default: 'CGCNN_demo').
294
+ epochs (int): Number of training epochs (default: 100).
295
+ batch_size (int): Training batch size (default: 32).
296
+ learning_rate (float): Learning rate (default: 0.002).
297
+ train_ratio (float): Ratio of data for training (default: 0.8).
298
+ val_ratio (float): Ratio of data for validation (default: 0.1).
299
+ test_ratio (float): Ratio of data for testing (default: 0.1).
300
+ save_model (bool): Whether to save the trained model (default: True).
301
+ model_path (str): Path to save the trained model (default: 'trained_model.pth').
302
+
303
+ Returns:
304
+ dict: Contains training results including train/val/test errors.
305
+ """
306
+ try:
307
+ if not MATDEEPLEARN_AVAILABLE:
308
+ return {"success": False, "error": "MatDeepLearn not available"}
309
+
310
+ if not os.path.exists(data_path):
311
+ return {"success": False, "error": f"Data path not found: {data_path}"}
312
+
313
+ # Load default config
314
+ config_path = os.path.join(project_root, "config.yml")
315
+ with open(config_path, "r") as f:
316
+ config = yaml.load(f, Loader=yaml.FullLoader)
317
+
318
+ if model_name not in config.get("Models", {}):
319
+ return {"success": False, "error": f"Model '{model_name}' not found"}
320
+
321
+ # Prepare configuration
322
+ job_config = {
323
+ "job_name": "mcp_train_job",
324
+ "reprocess": "False",
325
+ "model": model_name,
326
+ "load_model": "False",
327
+ "save_model": "True" if save_model else "False",
328
+ "model_path": model_path,
329
+ "write_output": "True",
330
+ "parallel": "False",
331
+ "seed": np.random.randint(1, 1e6)
332
+ }
333
+
334
+ training_config = {
335
+ "target_index": 0,
336
+ "loss": "l1_loss",
337
+ "train_ratio": train_ratio,
338
+ "val_ratio": val_ratio,
339
+ "test_ratio": test_ratio,
340
+ "verbosity": 5
341
+ }
342
+
343
+ model_config = config["Models"][model_name].copy()
344
+ model_config["epochs"] = epochs
345
+ model_config["batch_size"] = batch_size
346
+ model_config["lr"] = learning_rate
347
+
348
+ # Determine device
349
+ world_size = torch.cuda.device_count()
350
+ if world_size == 0:
351
+ rank = "cpu"
352
+ else:
353
+ rank = "cuda"
354
+
355
+ # Train model
356
+ error_values = training.train_regular(
357
+ rank,
358
+ world_size,
359
+ data_path,
360
+ job_config,
361
+ training_config,
362
+ model_config
363
+ )
364
+
365
+ return {
366
+ "success": True,
367
+ "model_name": model_name,
368
+ "epochs": epochs,
369
+ "train_error": float(error_values[0]) if error_values is not None else None,
370
+ "val_error": float(error_values[1]) if error_values is not None else None,
371
+ "test_error": float(error_values[2]) if error_values is not None else None,
372
+ "model_saved": save_model,
373
+ "model_path": model_path if save_model else None
374
+ }
375
+ except Exception as e:
376
+ return {"success": False, "error": str(e)}
377
+
378
+
379
+ @mcp.tool(name="predict_properties", description="Use a trained model to predict properties of new structures.")
380
+ def predict_properties(
381
+ data_path: str,
382
+ model_path: str,
383
+ target_index: int = 0
384
+ ) -> dict:
385
+ """
386
+ Use a trained model to predict properties of new structures.
387
+
388
+ Parameters:
389
+ data_path (str): Path to directory containing structure files to predict.
390
+ model_path (str): Path to the trained model file (.pth).
391
+ target_index (int): Index of target column (default: 0).
392
+
393
+ Returns:
394
+ dict: Contains predictions and error metrics.
395
+ """
396
+ try:
397
+ if not MATDEEPLEARN_AVAILABLE:
398
+ return {"success": False, "error": "MatDeepLearn not available"}
399
+
400
+ if not os.path.exists(data_path):
401
+ return {"success": False, "error": f"Data path not found: {data_path}"}
402
+
403
+ if not os.path.exists(model_path):
404
+ return {"success": False, "error": f"Model file not found: {model_path}"}
405
+
406
+ # Get dataset
407
+ dataset = process.get_dataset(data_path, target_index, "False")
408
+
409
+ job_config = {
410
+ "job_name": "mcp_predict_job",
411
+ "model_path": model_path,
412
+ "write_output": "True"
413
+ }
414
+
415
+ # Run prediction
416
+ test_error = training.predict(dataset, "l1_loss", job_config)
417
+
418
+ return {
419
+ "success": True,
420
+ "dataset_size": len(dataset),
421
+ "test_error": float(test_error),
422
+ "output_file": "mcp_predict_job_predicted_outputs.csv"
423
+ }
424
+ except Exception as e:
425
+ return {"success": False, "error": str(e)}
426
+
427
+
428
+ @mcp.tool(name="cross_validation", description="Perform k-fold cross validation on a dataset.")
429
+ def cross_validation(
430
+ data_path: str,
431
+ model_name: str = "CGCNN_demo",
432
+ cv_folds: int = 5,
433
+ epochs: int = 100
434
+ ) -> dict:
435
+ """
436
+ Perform k-fold cross validation on a dataset.
437
+
438
+ Parameters:
439
+ data_path (str): Path to directory containing structure data.
440
+ model_name (str): Name of the model to use (default: 'CGCNN_demo').
441
+ cv_folds (int): Number of cross-validation folds (default: 5).
442
+ epochs (int): Number of training epochs per fold (default: 100).
443
+
444
+ Returns:
445
+ dict: Contains cross-validation results.
446
+ """
447
+ try:
448
+ if not MATDEEPLEARN_AVAILABLE:
449
+ return {"success": False, "error": "MatDeepLearn not available"}
450
+
451
+ if not os.path.exists(data_path):
452
+ return {"success": False, "error": f"Data path not found: {data_path}"}
453
+
454
+ # Load config
455
+ config_path = os.path.join(project_root, "config.yml")
456
+ with open(config_path, "r") as f:
457
+ config = yaml.load(f, Loader=yaml.FullLoader)
458
+
459
+ if model_name not in config.get("Models", {}):
460
+ return {"success": False, "error": f"Model '{model_name}' not found"}
461
+
462
+ job_config = {
463
+ "job_name": "mcp_cv_job",
464
+ "reprocess": "False",
465
+ "model": model_name,
466
+ "cv_folds": cv_folds,
467
+ "write_output": "True",
468
+ "parallel": "False",
469
+ "seed": np.random.randint(1, 1e6)
470
+ }
471
+
472
+ training_config = {
473
+ "target_index": 0,
474
+ "loss": "l1_loss",
475
+ "verbosity": 5
476
+ }
477
+
478
+ model_config = config["Models"][model_name].copy()
479
+ model_config["epochs"] = epochs
480
+
481
+ world_size = torch.cuda.device_count()
482
+ rank = "cpu" if world_size == 0 else "cuda"
483
+
484
+ cv_error = training.train_CV(
485
+ rank,
486
+ world_size,
487
+ data_path,
488
+ job_config,
489
+ training_config,
490
+ model_config
491
+ )
492
+
493
+ return {
494
+ "success": True,
495
+ "model_name": model_name,
496
+ "cv_folds": cv_folds,
497
+ "cv_error": float(cv_error) if cv_error is not None else None,
498
+ "output_file": "mcp_cv_job_CV_outputs.csv"
499
+ }
500
+ except Exception as e:
501
+ return {"success": False, "error": str(e)}
502
+
503
+
504
+ @mcp.tool(name="analyze_structure", description="Analyze atomic structure data. You can pass file content directly (for CIF, XYZ, POSCAR formats) or a file path on the server.")
505
+ def analyze_structure(
506
+ file_content: Optional[str] = None,
507
+ file_format: Optional[str] = None,
508
+ structure_file: Optional[str] = None
509
+ ) -> dict:
510
+ """
511
+ Analyze the structure of an atomic structure.
512
+
513
+ Parameters:
514
+ file_content (str, optional): The content of the structure file (CIF, XYZ, POSCAR, JSON format).
515
+ Pass the actual file content directly here.
516
+ file_format (str, optional): Format of the file content ('cif', 'xyz', 'vasp', 'json').
517
+ Required when file_content is provided.
518
+ structure_file (str, optional): Path to a structure file on the server (legacy option).
519
+
520
+ Returns:
521
+ dict: Contains structure analysis including atoms, bonds, and graph info.
522
+
523
+ Example usage:
524
+ analyze_structure(file_content="your CIF file content here...", file_format="cif")
525
+ """
526
+ try:
527
+ import ase
528
+ from ase import io
529
+ from io import StringIO
530
+
531
+ structure = None
532
+
533
+ # Method 1: Direct file content (preferred for remote access)
534
+ if file_content is not None:
535
+ if file_format is None:
536
+ return {"success": False, "error": "file_format is required when providing file_content. Use 'cif', 'xyz', 'vasp', or 'json'."}
537
+
538
+ # Map common format names
539
+ format_map = {
540
+ 'cif': 'cif',
541
+ 'xyz': 'xyz',
542
+ 'vasp': 'vasp',
543
+ 'poscar': 'vasp',
544
+ 'json': 'json',
545
+ 'extxyz': 'extxyz'
546
+ }
547
+
548
+ fmt = format_map.get(file_format.lower())
549
+ if fmt is None:
550
+ return {"success": False, "error": f"Unsupported format: {file_format}. Supported: cif, xyz, vasp, poscar, json, extxyz"}
551
+
552
+ # Create a temporary file to read the structure
553
+ with tempfile.NamedTemporaryFile(mode='w', suffix=f'.{fmt}', delete=False) as tmp:
554
+ tmp.write(file_content)
555
+ tmp_path = tmp.name
556
+
557
+ try:
558
+ structure = ase.io.read(tmp_path, format=fmt)
559
+ finally:
560
+ os.unlink(tmp_path) # Clean up temp file
561
+
562
+ # Method 2: File path on server (legacy)
563
+ elif structure_file is not None:
564
+ if not os.path.exists(structure_file):
565
+ return {"success": False, "error": f"Structure file not found: {structure_file}. Tip: For remote MCP, pass file_content directly instead of file path."}
566
+ structure = ase.io.read(structure_file)
567
+
568
+ else:
569
+ return {"success": False, "error": "Either file_content (with file_format) or structure_file must be provided."}
570
+
571
+ # Get basic info
572
+ symbols = structure.get_chemical_symbols()
573
+ positions = structure.get_positions().tolist()
574
+ cell = structure.get_cell().tolist() if any(structure.pbc) else None
575
+ pbc = structure.pbc.tolist()
576
+
577
+ # Get distance matrix
578
+ distance_matrix = structure.get_all_distances(mic=True)
579
+
580
+ # Analyze connectivity
581
+ cutoff_radius = 8.0
582
+ neighbors_count = []
583
+ for i in range(len(structure)):
584
+ neighbors = np.sum((distance_matrix[i] > 0) & (distance_matrix[i] < cutoff_radius))
585
+ neighbors_count.append(int(neighbors))
586
+
587
+ return {
588
+ "success": True,
589
+ "num_atoms": len(structure),
590
+ "chemical_formula": structure.get_chemical_formula(),
591
+ "elements": list(set(symbols)),
592
+ "element_counts": {elem: symbols.count(elem) for elem in set(symbols)},
593
+ "has_periodicity": any(pbc),
594
+ "pbc": pbc,
595
+ "cell": cell,
596
+ "positions": positions[:10] if len(positions) > 10 else positions, # First 10 positions
597
+ "average_neighbors": float(np.mean(neighbors_count)),
598
+ "min_neighbors": min(neighbors_count),
599
+ "max_neighbors": max(neighbors_count),
600
+ "min_distance": float(distance_matrix[distance_matrix > 0].min()),
601
+ "max_distance": float(distance_matrix.max())
602
+ }
603
+ except Exception as e:
604
+ return {"success": False, "error": str(e)}
605
+
606
+
607
+ @mcp.tool(name="compare_models", description="Compare performance of different GNN models on a dataset.")
608
+ def compare_models(
609
+ data_path: str,
610
+ model_list: List[str] = None,
611
+ epochs: int = 50
612
+ ) -> dict:
613
+ """
614
+ Compare performance of different GNN models on a dataset.
615
+
616
+ Parameters:
617
+ data_path (str): Path to directory containing structure data.
618
+ model_list (List[str]): List of models to compare (default: all available).
619
+ epochs (int): Number of training epochs per model (default: 50).
620
+
621
+ Returns:
622
+ dict: Contains comparison results for each model.
623
+ """
624
+ try:
625
+ if not MATDEEPLEARN_AVAILABLE:
626
+ return {"success": False, "error": "MatDeepLearn not available"}
627
+
628
+ if not os.path.exists(data_path):
629
+ return {"success": False, "error": f"Data path not found: {data_path}"}
630
+
631
+ if model_list is None:
632
+ model_list = ["CGCNN_demo", "GCN_demo", "SchNet_demo"]
633
+
634
+ results = {}
635
+
636
+ for model_name in model_list:
637
+ try:
638
+ result = train_model(
639
+ data_path=data_path,
640
+ model_name=model_name,
641
+ epochs=epochs,
642
+ save_model=False
643
+ )
644
+
645
+ if result["success"]:
646
+ results[model_name] = {
647
+ "train_error": result["train_error"],
648
+ "val_error": result["val_error"],
649
+ "test_error": result["test_error"]
650
+ }
651
+ else:
652
+ results[model_name] = {"error": result["error"]}
653
+ except Exception as e:
654
+ results[model_name] = {"error": str(e)}
655
+
656
+ # Find best model
657
+ best_model = None
658
+ best_error = float("inf")
659
+ for model, res in results.items():
660
+ if "test_error" in res and res["test_error"] is not None:
661
+ if res["test_error"] < best_error:
662
+ best_error = res["test_error"]
663
+ best_model = model
664
+
665
+ return {
666
+ "success": True,
667
+ "results": results,
668
+ "best_model": best_model,
669
+ "best_test_error": best_error if best_model else None
670
+ }
671
+ except Exception as e:
672
+ return {"success": False, "error": str(e)}
673
+
674
+
675
+ @mcp.tool(name="get_dataset_info", description="Get information about a dataset directory or uploaded dataset.")
676
+ def get_dataset_info(
677
+ data_path: Optional[str] = None,
678
+ structure_files: Optional[List[str]] = None,
679
+ targets_csv_content: Optional[str] = None
680
+ ) -> dict:
681
+ """
682
+ Get information about a dataset.
683
+
684
+ Parameters:
685
+ data_path (str, optional): Path to directory containing structure data (server-side).
686
+ structure_files (List[str], optional): List of structure filenames (for validation check).
687
+ targets_csv_content (str, optional): Content of targets.csv file to analyze.
688
+
689
+ Returns:
690
+ dict: Contains dataset information including file counts and formats.
691
+ """
692
+ try:
693
+ # If analyzing uploaded content
694
+ if targets_csv_content is not None:
695
+ import csv
696
+ from io import StringIO
697
+
698
+ reader = csv.reader(StringIO(targets_csv_content))
699
+ rows = list(reader)
700
+ num_samples = len(rows)
701
+
702
+ # Parse target values
703
+ target_values = []
704
+ for row in rows:
705
+ if len(row) >= 2:
706
+ try:
707
+ target_values.append(float(row[1]))
708
+ except:
709
+ pass
710
+
711
+ result = {
712
+ "success": True,
713
+ "source": "uploaded_content",
714
+ "num_samples": num_samples,
715
+ "has_targets_csv": True,
716
+ "ready_for_training": True
717
+ }
718
+
719
+ if target_values:
720
+ result["target_statistics"] = {
721
+ "min": min(target_values),
722
+ "max": max(target_values),
723
+ "mean": sum(target_values) / len(target_values)
724
+ }
725
+
726
+ if structure_files:
727
+ extensions = {}
728
+ for f in structure_files:
729
+ ext = os.path.splitext(f)[1].lower()
730
+ extensions[ext] = extensions.get(ext, 0) + 1
731
+ result["file_extensions"] = extensions
732
+ result["num_structure_files"] = len(structure_files)
733
+
734
+ return result
735
+
736
+ # Traditional path-based analysis
737
+ if data_path is None:
738
+ return {"success": False, "error": "Either data_path or targets_csv_content must be provided"}
739
+
740
+ if not os.path.exists(data_path):
741
+ return {"success": False, "error": f"Data path not found: {data_path}"}
742
+
743
+ # Count files by extension
744
+ extensions = {}
745
+ for file in os.listdir(data_path):
746
+ ext = os.path.splitext(file)[1].lower()
747
+ extensions[ext] = extensions.get(ext, 0) + 1
748
+
749
+ # Check for required files
750
+ has_targets = os.path.exists(os.path.join(data_path, "targets.csv"))
751
+ has_atom_dict = os.path.exists(os.path.join(data_path, "atom_dict.json"))
752
+ has_processed = os.path.exists(os.path.join(data_path, "processed"))
753
+
754
+ # Read targets if available
755
+ num_samples = 0
756
+ if has_targets:
757
+ import csv
758
+ with open(os.path.join(data_path, "targets.csv")) as f:
759
+ num_samples = sum(1 for _ in csv.reader(f))
760
+
761
+ return {
762
+ "success": True,
763
+ "source": "server_path",
764
+ "data_path": data_path,
765
+ "file_extensions": extensions,
766
+ "has_targets_csv": has_targets,
767
+ "has_atom_dict": has_atom_dict,
768
+ "has_processed_data": has_processed,
769
+ "num_samples": num_samples,
770
+ "ready_for_training": has_targets
771
+ }
772
+ except Exception as e:
773
+ return {"success": False, "error": str(e)}
774
+
775
+
776
+ @mcp.tool(name="quick_structure_analysis", description="Quick analysis of a structure file content without needing a server path. Ideal for analyzing uploaded files from Cursor.")
777
+ def quick_structure_analysis(
778
+ file_content: str,
779
+ file_format: str,
780
+ include_positions: bool = False,
781
+ include_distances: bool = True
782
+ ) -> dict:
783
+ """
784
+ Perform quick analysis on structure file content uploaded directly.
785
+ This is the recommended tool for analyzing structures when using remote MCP.
786
+
787
+ Parameters:
788
+ file_content (str): The complete content of the structure file.
789
+ file_format (str): Format of the file - 'cif', 'xyz', 'vasp'/'poscar', 'json', 'extxyz'.
790
+ include_positions (bool): Whether to include atomic positions in output (default: False).
791
+ include_distances (bool): Whether to include distance analysis (default: True).
792
+
793
+ Returns:
794
+ dict: Comprehensive structure analysis.
795
+
796
+ Example:
797
+ quick_structure_analysis(
798
+ file_content="data_NaCl\\n_cell_length_a 5.64...",
799
+ file_format="cif"
800
+ )
801
+ """
802
+ try:
803
+ import ase
804
+ from ase import io
805
+
806
+ # Map format names
807
+ format_map = {
808
+ 'cif': 'cif',
809
+ 'xyz': 'xyz',
810
+ 'vasp': 'vasp',
811
+ 'poscar': 'vasp',
812
+ 'json': 'json',
813
+ 'extxyz': 'extxyz'
814
+ }
815
+
816
+ fmt = format_map.get(file_format.lower())
817
+ if fmt is None:
818
+ return {
819
+ "success": False,
820
+ "error": f"Unsupported format: {file_format}. Supported: cif, xyz, vasp, poscar, json, extxyz"
821
+ }
822
+
823
+ # Write to temp file and read
824
+ with tempfile.NamedTemporaryFile(mode='w', suffix=f'.{fmt}', delete=False) as tmp:
825
+ tmp.write(file_content)
826
+ tmp_path = tmp.name
827
+
828
+ try:
829
+ structure = ase.io.read(tmp_path, format=fmt)
830
+ finally:
831
+ os.unlink(tmp_path)
832
+
833
+ # Basic analysis
834
+ symbols = structure.get_chemical_symbols()
835
+ cell = structure.get_cell().tolist() if any(structure.pbc) else None
836
+ pbc = structure.pbc.tolist()
837
+
838
+ result = {
839
+ "success": True,
840
+ "num_atoms": len(structure),
841
+ "chemical_formula": structure.get_chemical_formula(),
842
+ "reduced_formula": structure.get_chemical_formula(mode='reduce'),
843
+ "elements": sorted(list(set(symbols))),
844
+ "element_counts": {elem: symbols.count(elem) for elem in set(symbols)},
845
+ "has_periodicity": any(pbc),
846
+ "pbc": pbc,
847
+ "cell_parameters": cell,
848
+ "volume": float(structure.get_volume()) if any(pbc) else None,
849
+ }
850
+
851
+ if include_positions:
852
+ positions = structure.get_positions().tolist()
853
+ result["positions"] = positions
854
+ result["symbols"] = symbols
855
+
856
+ if include_distances:
857
+ distance_matrix = structure.get_all_distances(mic=True)
858
+ cutoff_radius = 8.0
859
+ neighbors_count = []
860
+ for i in range(len(structure)):
861
+ neighbors = np.sum((distance_matrix[i] > 0) & (distance_matrix[i] < cutoff_radius))
862
+ neighbors_count.append(int(neighbors))
863
+
864
+ result["distance_analysis"] = {
865
+ "cutoff_radius": cutoff_radius,
866
+ "average_neighbors": float(np.mean(neighbors_count)),
867
+ "min_neighbors": min(neighbors_count),
868
+ "max_neighbors": max(neighbors_count),
869
+ "min_distance": float(distance_matrix[distance_matrix > 0].min()),
870
+ "max_distance": float(distance_matrix.max())
871
+ }
872
+
873
+ # Check if suitable for GNN
874
+ result["gnn_suitable"] = {
875
+ "has_enough_atoms": len(structure) >= 2,
876
+ "has_3d_coordinates": True,
877
+ "is_periodic": any(pbc),
878
+ "recommendation": "Suitable for GNN training" if len(structure) >= 2 else "Too few atoms"
879
+ }
880
+
881
+ return result
882
+
883
+ except Exception as e:
884
+ return {"success": False, "error": str(e)}
885
+
886
+
887
+ def create_app() -> FastMCP:
888
+ """
889
+ Creates and returns the FastMCP application instance.
890
+
891
+ Returns:
892
+ FastMCP: The FastMCP application instance.
893
+ """
894
+ return mcp