Spaces:

atomind
/

mlip-arena

Running

App Files Files Community

Yuan (Cyrus) Chiang commited on about 7 hours ago

Commit

aadf5d0

•

1 Parent(s): b7a7786

Clean up `eos_alloy` (#36)

Browse files

* refactor input and flow

* move notebook from src to examples

* change gitignore

Files changed (4) hide show

.gitignore +2 -1
mlip_arena/tasks/eos_alloy/run.ipynb → examples/eos_alloy/run_Fe-Ni-Cr.ipynb +0 -0
mlip_arena/tasks/eos_alloy/flow.py +4 -50
mlip_arena/tasks/eos_alloy/input.py +96 -49

.gitignore CHANGED Viewed

@@ -2,7 +2,8 @@
 *.ipynb
 *.extxyz
 *.traj
-mlip_arena/tasks/*/*
 lab/
 manuscripts/

 *.ipynb
 *.extxyz
 *.traj
+mlip_arena/tasks/*/
+examples/
 lab/
 manuscripts/

mlip_arena/tasks/eos_alloy/run.ipynb → examples/eos_alloy/run_Fe-Ni-Cr.ipynb RENAMED Viewed

The diff for this file is too large to render. See raw diff

mlip_arena/tasks/eos_alloy/flow.py CHANGED Viewed

@@ -1,16 +1,12 @@
 from functools import partial
 from pathlib import Path
-import json
 import pandas as pd
-from dask.distributed import Client
-from dask_jobqueue import SLURMCluster
 from huggingface_hub import hf_hub_download
 from prefect import Task, flow, task
 from prefect.client.schemas.objects import TaskRun
 from prefect.futures import wait
-from prefect.states import State, Failed
-from prefect_dask import DaskTaskRunner
 from ase.db import connect
 from mlip_arena.data.local import SafeHDFStore
@@ -47,7 +43,7 @@ def save_to_hdf(
     if not isinstance(result, dict):
         return
     try:
         atoms = result["atoms"]
         calculator_name = (
@@ -78,8 +74,7 @@ def save_to_hdf(
         family_path = Path(__file__).parent / REGISTRY[calculator_name]["family"]
         family_path.mkdir(parents=True, exist_ok=True)
-        with open(family_path / f"{calculator_name}_{formula}.json", "w") as f:
-            json.dump(result, f, indent=2)
         with SafeHDFStore(fpath, mode="a") as store:
             store.append(
@@ -131,6 +126,7 @@ def run_from_db(
                 criterion=criterion,
                 max_abs_strain=max_abs_strain,
                 concurrent=concurrent,
             )
             futures.append(future)
@@ -141,45 +137,3 @@ def run_from_db(
         for f in futures
         if f.state.is_completed()
     ]
-if __name__ == "__main__":
-    nodes_per_alloc = 1
-    gpus_per_alloc = 4
-    ntasks = 1
-    cluster_kwargs = dict(
-        cores=1,
-        memory="64 GB",
-        shebang="#!/bin/bash",
-        account="m3828",
-        walltime="00:30:00",
-        job_mem="0",
-        job_script_prologue=[
-            "source ~/.bashrc",
-            "module load python",
-            "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena",
-        ],
-        job_directives_skip=["-n", "--cpus-per-task", "-J"],
-        job_extra_directives=[
-            "-J eos",
-            "-q debug",
-            f"-N {nodes_per_alloc}",
-            "-C gpu",
-            f"-G {gpus_per_alloc}",
-        ],
-    )
-    cluster = SLURMCluster(**cluster_kwargs)
-    print(cluster.job_script())
-    cluster.adapt(minimum_jobs=2, maximum_jobs=2)
-    client = Client(cluster)
-    run_from_db_ = run_from_db.with_options(
-        task_runner=DaskTaskRunner(address=client.scheduler.address),
-        log_prints=True,
-    )
-    results = run_from_db_(
-        db_path="sqs_Fe-Ni-Cr.db", out_path="eos.h5", table_name="Fe-Ni-Cr"
-    )

 from functools import partial
 from pathlib import Path
 import pandas as pd
 from huggingface_hub import hf_hub_download
 from prefect import Task, flow, task
 from prefect.client.schemas.objects import TaskRun
 from prefect.futures import wait
+from prefect.states import State
 from ase.db import connect
 from mlip_arena.data.local import SafeHDFStore
     if not isinstance(result, dict):
         return
     try:
         atoms = result["atoms"]
         calculator_name = (
         family_path = Path(__file__).parent / REGISTRY[calculator_name]["family"]
         family_path.mkdir(parents=True, exist_ok=True)
+        df.to_json(family_path / f"{calculator_name}_{formula}.json", indent=2)
         with SafeHDFStore(fpath, mode="a") as store:
             store.append(
                 criterion=criterion,
                 max_abs_strain=max_abs_strain,
                 concurrent=concurrent,
+                cache_opt=False,
             )
             futures.append(future)
         for f in futures
         if f.state.is_completed()
     ]

mlip_arena/tasks/eos_alloy/input.py CHANGED Viewed

@@ -22,17 +22,75 @@ Authors
 import os
 from pathlib import Path
 import numpy as np
-from dotenv import load_dotenv
-from huggingface_hub import HfApi
 from tqdm.auto import tqdm
 from ase import Atoms
-from ase.build import bulk
 from ase.db import connect
 def body_order(n=32, b=5):
     """
     Generate all possible combinations of atomic counts for `b` species
@@ -69,17 +127,16 @@ def get_endmember(structure, conc_lst, elements):
 def generate_alloy_db(
     structure_template: Atoms,
     elements: list[str],
-    local_path: Path | None = None,
     upload: bool = True,
     repo_id: str = "atomind/mlip-arena",
 ) -> Path:
-    # Load Hugging Face API token
-    load_dotenv()
-    hf_token = os.getenv("HF_TOKEN", None)
     if upload and hf_token is None:
-        raise ValueError("HF_TOKEN environment variable not set.")
     num_atoms = len(structure_template)
     num_species = len(elements)
@@ -88,45 +145,35 @@ def generate_alloy_db(
     # Prepare the database
     db_path = (
-        local_path or Path(__file__).resolve().parent / f"sqs_{'-'.join(elements)}.db"
     )
     db_path.unlink(missing_ok=True)
-    # Generate and save structures
-    with connect(db_path) as db:
-        for i, composition in tqdm(
-            enumerate(configurations), total=len(configurations)
-        ):
-            # Skip trivial cases where only one element is present
-            if sum(composition == 0) != len(elements) - 1:
-                atoms = generate_sqs(
-                    structure_template=structure_template,
-                    elements=np.array(elements)[composition != 0],
-                    counts=composition[composition != 0],
-                )
-            else:
-                atoms = get_endmember(
-                    structure=structure_template.copy(),
-                    conc_lst=composition,
-                    elements=elements,
-                )
-            db.write(atoms)
-    # Upload the database to Hugging Face Hub
-    if upload:
-        api = HfApi(token=hf_token)
-        api.upload_file(
-            path_or_fileobj=db_path,
-            path_in_repo=f"{Path(__file__).parent.name}/{db_path.name}",
-            repo_id=repo_id,
-            repo_type="dataset",
-        )
-        print(f"Database uploaded: {db_path}")
-    return db_path
-if __name__ == "__main__":
-    structure_template = bulk("Al", a=3.6, cubic=True).repeat([2, 2, 2])
-    elements = ["Fe", "Ni", "Cr"]
-    generate_alloy_db(structure_template, elements, upload=True)

 import os
 from pathlib import Path
+from typing import Generator, Iterable
 import numpy as np
+from huggingface_hub import HfApi, hf_hub_download
+from prefect import task
 from tqdm.auto import tqdm
 from ase import Atoms
 from ase.db import connect
+def save_to_db(
+    atoms_list: list[Atoms] | Iterable[Atoms] | Atoms,
+    db_path: Path | str,
+    upload: bool = True,
+    hf_token: str | None = os.getenv("HF_TOKEN", None),
+    repo_id: str = "atomind/mlip-arena",
+    repo_type: str = "dataset",
+    subfolder: str = Path(__file__).parent.name,
+):
+    """Save ASE Atoms objects to an ASE database and optionally upload to Hugging Face Hub."""
+    if upload and hf_token is None:
+        raise ValueError("HF_TOKEN is required to upload the database.")
+    db_path = Path(db_path)
+    if isinstance(atoms_list, Atoms):
+        atoms_list = [atoms_list]
+    with connect(db_path) as db:
+        for atoms in atoms_list:
+            if not isinstance(atoms, Atoms):
+                raise ValueError("atoms_list must contain ASE Atoms objects.")
+            db.write(atoms)
+    if upload:
+        api = HfApi(token=hf_token)
+        api.upload_file(
+            path_or_fileobj=db_path,
+            path_in_repo=f"{subfolder}/{db_path.name}",
+            repo_id=repo_id,
+            repo_type=repo_type,
+        )
+        print(f"{db_path.name} uploaded to {repo_id}/{subfolder}")
+    return db_path
+@task
+def get_atoms_from_db(
+    db_path: Path | str,
+    repo_id: str = "atomind/mlip-arena",
+    repo_type: str = "dataset",
+    subfolder: str = Path(__file__).parent.name,
+) -> Generator[Atoms, None, None]:
+    """Retrieve ASE Atoms objects from an ASE database."""
+    db_path = Path(db_path)
+    if not db_path.exists():
+        db_path = hf_hub_download(
+            repo_id=repo_id,
+            repo_type=repo_type,
+            subfolder=subfolder,
+            filename=str(db_path),
+        )
+    with connect(db_path) as db:
+        for row in db.select():
+            yield row.toatoms()
 def body_order(n=32, b=5):
     """
     Generate all possible combinations of atomic counts for `b` species
 def generate_alloy_db(
     structure_template: Atoms,
     elements: list[str],
+    db_path: Path | str,
     upload: bool = True,
+    hf_token: str | None = os.getenv("HF_TOKEN", None),
     repo_id: str = "atomind/mlip-arena",
+    repo_type: str = "dataset",
 ) -> Path:
     if upload and hf_token is None:
+        raise ValueError("HF_TOKEN is required to upload the database.")
     num_atoms = len(structure_template)
     num_species = len(elements)
     # Prepare the database
     db_path = (
+        Path(db_path) or Path(__file__).resolve().parent / f"sqs_{'-'.join(elements)}.db"
     )
     db_path.unlink(missing_ok=True)
+    atoms_list = []
+    for i, composition in tqdm(
+        enumerate(configurations), total=len(configurations)
+    ):
+        # Skip trivial cases where only one element is present
+        if sum(composition == 0) != len(elements) - 1:
+            atoms = generate_sqs(
+                structure_template=structure_template,
+                elements=np.array(elements)[composition != 0],
+                counts=composition[composition != 0],
+            )
+        else:
+            atoms = get_endmember(
+                structure=structure_template.copy(),
+                conc_lst=composition,
+                elements=elements,
+            )
+        atoms_list.append(atoms)
+    return save_to_db(
+        atoms_list=atoms_list,
+        db_path=db_path,
+        upload=upload,
+        hf_token=hf_token,
+        repo_id=repo_id,
+        repo_type=repo_type,
+    )