Spaces:
Running
Running
Yuan (Cyrus) Chiang
commited on
Commit
•
aadf5d0
1
Parent(s):
b7a7786
Clean up `eos_alloy` (#36)
Browse files* refactor input and flow
* move notebook from src to examples
* change gitignore
.gitignore
CHANGED
@@ -2,7 +2,8 @@
|
|
2 |
*.ipynb
|
3 |
*.extxyz
|
4 |
*.traj
|
5 |
-
mlip_arena/tasks
|
|
|
6 |
lab/
|
7 |
manuscripts/
|
8 |
|
|
|
2 |
*.ipynb
|
3 |
*.extxyz
|
4 |
*.traj
|
5 |
+
mlip_arena/tasks/*/
|
6 |
+
examples/
|
7 |
lab/
|
8 |
manuscripts/
|
9 |
|
mlip_arena/tasks/eos_alloy/run.ipynb → examples/eos_alloy/run_Fe-Ni-Cr.ipynb
RENAMED
The diff for this file is too large to render.
See raw diff
|
|
mlip_arena/tasks/eos_alloy/flow.py
CHANGED
@@ -1,16 +1,12 @@
|
|
1 |
from functools import partial
|
2 |
from pathlib import Path
|
3 |
-
import json
|
4 |
|
5 |
import pandas as pd
|
6 |
-
from dask.distributed import Client
|
7 |
-
from dask_jobqueue import SLURMCluster
|
8 |
from huggingface_hub import hf_hub_download
|
9 |
from prefect import Task, flow, task
|
10 |
from prefect.client.schemas.objects import TaskRun
|
11 |
from prefect.futures import wait
|
12 |
-
from prefect.states import State
|
13 |
-
from prefect_dask import DaskTaskRunner
|
14 |
|
15 |
from ase.db import connect
|
16 |
from mlip_arena.data.local import SafeHDFStore
|
@@ -47,7 +43,7 @@ def save_to_hdf(
|
|
47 |
|
48 |
if not isinstance(result, dict):
|
49 |
return
|
50 |
-
|
51 |
try:
|
52 |
atoms = result["atoms"]
|
53 |
calculator_name = (
|
@@ -78,8 +74,7 @@ def save_to_hdf(
|
|
78 |
family_path = Path(__file__).parent / REGISTRY[calculator_name]["family"]
|
79 |
family_path.mkdir(parents=True, exist_ok=True)
|
80 |
|
81 |
-
|
82 |
-
json.dump(result, f, indent=2)
|
83 |
|
84 |
with SafeHDFStore(fpath, mode="a") as store:
|
85 |
store.append(
|
@@ -131,6 +126,7 @@ def run_from_db(
|
|
131 |
criterion=criterion,
|
132 |
max_abs_strain=max_abs_strain,
|
133 |
concurrent=concurrent,
|
|
|
134 |
)
|
135 |
futures.append(future)
|
136 |
|
@@ -141,45 +137,3 @@ def run_from_db(
|
|
141 |
for f in futures
|
142 |
if f.state.is_completed()
|
143 |
]
|
144 |
-
|
145 |
-
|
146 |
-
if __name__ == "__main__":
|
147 |
-
nodes_per_alloc = 1
|
148 |
-
gpus_per_alloc = 4
|
149 |
-
ntasks = 1
|
150 |
-
|
151 |
-
cluster_kwargs = dict(
|
152 |
-
cores=1,
|
153 |
-
memory="64 GB",
|
154 |
-
shebang="#!/bin/bash",
|
155 |
-
account="m3828",
|
156 |
-
walltime="00:30:00",
|
157 |
-
job_mem="0",
|
158 |
-
job_script_prologue=[
|
159 |
-
"source ~/.bashrc",
|
160 |
-
"module load python",
|
161 |
-
"source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena",
|
162 |
-
],
|
163 |
-
job_directives_skip=["-n", "--cpus-per-task", "-J"],
|
164 |
-
job_extra_directives=[
|
165 |
-
"-J eos",
|
166 |
-
"-q debug",
|
167 |
-
f"-N {nodes_per_alloc}",
|
168 |
-
"-C gpu",
|
169 |
-
f"-G {gpus_per_alloc}",
|
170 |
-
],
|
171 |
-
)
|
172 |
-
|
173 |
-
cluster = SLURMCluster(**cluster_kwargs)
|
174 |
-
print(cluster.job_script())
|
175 |
-
cluster.adapt(minimum_jobs=2, maximum_jobs=2)
|
176 |
-
client = Client(cluster)
|
177 |
-
|
178 |
-
run_from_db_ = run_from_db.with_options(
|
179 |
-
task_runner=DaskTaskRunner(address=client.scheduler.address),
|
180 |
-
log_prints=True,
|
181 |
-
)
|
182 |
-
|
183 |
-
results = run_from_db_(
|
184 |
-
db_path="sqs_Fe-Ni-Cr.db", out_path="eos.h5", table_name="Fe-Ni-Cr"
|
185 |
-
)
|
|
|
1 |
from functools import partial
|
2 |
from pathlib import Path
|
|
|
3 |
|
4 |
import pandas as pd
|
|
|
|
|
5 |
from huggingface_hub import hf_hub_download
|
6 |
from prefect import Task, flow, task
|
7 |
from prefect.client.schemas.objects import TaskRun
|
8 |
from prefect.futures import wait
|
9 |
+
from prefect.states import State
|
|
|
10 |
|
11 |
from ase.db import connect
|
12 |
from mlip_arena.data.local import SafeHDFStore
|
|
|
43 |
|
44 |
if not isinstance(result, dict):
|
45 |
return
|
46 |
+
|
47 |
try:
|
48 |
atoms = result["atoms"]
|
49 |
calculator_name = (
|
|
|
74 |
family_path = Path(__file__).parent / REGISTRY[calculator_name]["family"]
|
75 |
family_path.mkdir(parents=True, exist_ok=True)
|
76 |
|
77 |
+
df.to_json(family_path / f"{calculator_name}_{formula}.json", indent=2)
|
|
|
78 |
|
79 |
with SafeHDFStore(fpath, mode="a") as store:
|
80 |
store.append(
|
|
|
126 |
criterion=criterion,
|
127 |
max_abs_strain=max_abs_strain,
|
128 |
concurrent=concurrent,
|
129 |
+
cache_opt=False,
|
130 |
)
|
131 |
futures.append(future)
|
132 |
|
|
|
137 |
for f in futures
|
138 |
if f.state.is_completed()
|
139 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mlip_arena/tasks/eos_alloy/input.py
CHANGED
@@ -22,17 +22,75 @@ Authors
|
|
22 |
|
23 |
import os
|
24 |
from pathlib import Path
|
|
|
25 |
|
26 |
import numpy as np
|
27 |
-
from
|
28 |
-
from
|
29 |
from tqdm.auto import tqdm
|
30 |
|
31 |
from ase import Atoms
|
32 |
-
from ase.build import bulk
|
33 |
from ase.db import connect
|
34 |
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
def body_order(n=32, b=5):
|
37 |
"""
|
38 |
Generate all possible combinations of atomic counts for `b` species
|
@@ -69,17 +127,16 @@ def get_endmember(structure, conc_lst, elements):
|
|
69 |
def generate_alloy_db(
|
70 |
structure_template: Atoms,
|
71 |
elements: list[str],
|
72 |
-
|
73 |
upload: bool = True,
|
|
|
74 |
repo_id: str = "atomind/mlip-arena",
|
|
|
75 |
) -> Path:
|
76 |
-
|
77 |
-
load_dotenv()
|
78 |
-
hf_token = os.getenv("HF_TOKEN", None)
|
79 |
-
|
80 |
if upload and hf_token is None:
|
81 |
-
raise ValueError("HF_TOKEN
|
82 |
-
|
83 |
num_atoms = len(structure_template)
|
84 |
num_species = len(elements)
|
85 |
|
@@ -88,45 +145,35 @@ def generate_alloy_db(
|
|
88 |
|
89 |
# Prepare the database
|
90 |
db_path = (
|
91 |
-
|
92 |
)
|
93 |
db_path.unlink(missing_ok=True)
|
94 |
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
)
|
124 |
-
print(f"Database uploaded: {db_path}")
|
125 |
-
|
126 |
-
return db_path
|
127 |
-
|
128 |
-
|
129 |
-
if __name__ == "__main__":
|
130 |
-
structure_template = bulk("Al", a=3.6, cubic=True).repeat([2, 2, 2])
|
131 |
-
elements = ["Fe", "Ni", "Cr"]
|
132 |
-
generate_alloy_db(structure_template, elements, upload=True)
|
|
|
22 |
|
23 |
import os
|
24 |
from pathlib import Path
|
25 |
+
from typing import Generator, Iterable
|
26 |
|
27 |
import numpy as np
|
28 |
+
from huggingface_hub import HfApi, hf_hub_download
|
29 |
+
from prefect import task
|
30 |
from tqdm.auto import tqdm
|
31 |
|
32 |
from ase import Atoms
|
|
|
33 |
from ase.db import connect
|
34 |
|
35 |
|
36 |
+
def save_to_db(
|
37 |
+
atoms_list: list[Atoms] | Iterable[Atoms] | Atoms,
|
38 |
+
db_path: Path | str,
|
39 |
+
upload: bool = True,
|
40 |
+
hf_token: str | None = os.getenv("HF_TOKEN", None),
|
41 |
+
repo_id: str = "atomind/mlip-arena",
|
42 |
+
repo_type: str = "dataset",
|
43 |
+
subfolder: str = Path(__file__).parent.name,
|
44 |
+
):
|
45 |
+
"""Save ASE Atoms objects to an ASE database and optionally upload to Hugging Face Hub."""
|
46 |
+
|
47 |
+
if upload and hf_token is None:
|
48 |
+
raise ValueError("HF_TOKEN is required to upload the database.")
|
49 |
+
|
50 |
+
db_path = Path(db_path)
|
51 |
+
|
52 |
+
if isinstance(atoms_list, Atoms):
|
53 |
+
atoms_list = [atoms_list]
|
54 |
+
|
55 |
+
with connect(db_path) as db:
|
56 |
+
for atoms in atoms_list:
|
57 |
+
if not isinstance(atoms, Atoms):
|
58 |
+
raise ValueError("atoms_list must contain ASE Atoms objects.")
|
59 |
+
db.write(atoms)
|
60 |
+
|
61 |
+
if upload:
|
62 |
+
api = HfApi(token=hf_token)
|
63 |
+
api.upload_file(
|
64 |
+
path_or_fileobj=db_path,
|
65 |
+
path_in_repo=f"{subfolder}/{db_path.name}",
|
66 |
+
repo_id=repo_id,
|
67 |
+
repo_type=repo_type,
|
68 |
+
)
|
69 |
+
print(f"{db_path.name} uploaded to {repo_id}/{subfolder}")
|
70 |
+
|
71 |
+
return db_path
|
72 |
+
|
73 |
+
@task
|
74 |
+
def get_atoms_from_db(
|
75 |
+
db_path: Path | str,
|
76 |
+
repo_id: str = "atomind/mlip-arena",
|
77 |
+
repo_type: str = "dataset",
|
78 |
+
subfolder: str = Path(__file__).parent.name,
|
79 |
+
) -> Generator[Atoms, None, None]:
|
80 |
+
"""Retrieve ASE Atoms objects from an ASE database."""
|
81 |
+
db_path = Path(db_path)
|
82 |
+
if not db_path.exists():
|
83 |
+
db_path = hf_hub_download(
|
84 |
+
repo_id=repo_id,
|
85 |
+
repo_type=repo_type,
|
86 |
+
subfolder=subfolder,
|
87 |
+
filename=str(db_path),
|
88 |
+
)
|
89 |
+
with connect(db_path) as db:
|
90 |
+
for row in db.select():
|
91 |
+
yield row.toatoms()
|
92 |
+
|
93 |
+
|
94 |
def body_order(n=32, b=5):
|
95 |
"""
|
96 |
Generate all possible combinations of atomic counts for `b` species
|
|
|
127 |
def generate_alloy_db(
|
128 |
structure_template: Atoms,
|
129 |
elements: list[str],
|
130 |
+
db_path: Path | str,
|
131 |
upload: bool = True,
|
132 |
+
hf_token: str | None = os.getenv("HF_TOKEN", None),
|
133 |
repo_id: str = "atomind/mlip-arena",
|
134 |
+
repo_type: str = "dataset",
|
135 |
) -> Path:
|
136 |
+
|
|
|
|
|
|
|
137 |
if upload and hf_token is None:
|
138 |
+
raise ValueError("HF_TOKEN is required to upload the database.")
|
139 |
+
|
140 |
num_atoms = len(structure_template)
|
141 |
num_species = len(elements)
|
142 |
|
|
|
145 |
|
146 |
# Prepare the database
|
147 |
db_path = (
|
148 |
+
Path(db_path) or Path(__file__).resolve().parent / f"sqs_{'-'.join(elements)}.db"
|
149 |
)
|
150 |
db_path.unlink(missing_ok=True)
|
151 |
|
152 |
+
atoms_list = []
|
153 |
+
for i, composition in tqdm(
|
154 |
+
enumerate(configurations), total=len(configurations)
|
155 |
+
):
|
156 |
+
# Skip trivial cases where only one element is present
|
157 |
+
if sum(composition == 0) != len(elements) - 1:
|
158 |
+
atoms = generate_sqs(
|
159 |
+
structure_template=structure_template,
|
160 |
+
elements=np.array(elements)[composition != 0],
|
161 |
+
counts=composition[composition != 0],
|
162 |
+
)
|
163 |
+
else:
|
164 |
+
atoms = get_endmember(
|
165 |
+
structure=structure_template.copy(),
|
166 |
+
conc_lst=composition,
|
167 |
+
elements=elements,
|
168 |
+
)
|
169 |
+
atoms_list.append(atoms)
|
170 |
+
|
171 |
+
|
172 |
+
return save_to_db(
|
173 |
+
atoms_list=atoms_list,
|
174 |
+
db_path=db_path,
|
175 |
+
upload=upload,
|
176 |
+
hf_token=hf_token,
|
177 |
+
repo_id=repo_id,
|
178 |
+
repo_type=repo_type,
|
179 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|