Yuan (Cyrus) Chiang commited on
Commit
aadf5d0
1 Parent(s): b7a7786

Clean up `eos_alloy` (#36)

Browse files

* refactor input and flow

* move notebook from src to examples

* change gitignore

.gitignore CHANGED
@@ -2,7 +2,8 @@
2
  *.ipynb
3
  *.extxyz
4
  *.traj
5
- mlip_arena/tasks/*/*
 
6
  lab/
7
  manuscripts/
8
 
 
2
  *.ipynb
3
  *.extxyz
4
  *.traj
5
+ mlip_arena/tasks/*/
6
+ examples/
7
  lab/
8
  manuscripts/
9
 
mlip_arena/tasks/eos_alloy/run.ipynb → examples/eos_alloy/run_Fe-Ni-Cr.ipynb RENAMED
The diff for this file is too large to render. See raw diff
 
mlip_arena/tasks/eos_alloy/flow.py CHANGED
@@ -1,16 +1,12 @@
1
  from functools import partial
2
  from pathlib import Path
3
- import json
4
 
5
  import pandas as pd
6
- from dask.distributed import Client
7
- from dask_jobqueue import SLURMCluster
8
  from huggingface_hub import hf_hub_download
9
  from prefect import Task, flow, task
10
  from prefect.client.schemas.objects import TaskRun
11
  from prefect.futures import wait
12
- from prefect.states import State, Failed
13
- from prefect_dask import DaskTaskRunner
14
 
15
  from ase.db import connect
16
  from mlip_arena.data.local import SafeHDFStore
@@ -47,7 +43,7 @@ def save_to_hdf(
47
 
48
  if not isinstance(result, dict):
49
  return
50
-
51
  try:
52
  atoms = result["atoms"]
53
  calculator_name = (
@@ -78,8 +74,7 @@ def save_to_hdf(
78
  family_path = Path(__file__).parent / REGISTRY[calculator_name]["family"]
79
  family_path.mkdir(parents=True, exist_ok=True)
80
 
81
- with open(family_path / f"{calculator_name}_{formula}.json", "w") as f:
82
- json.dump(result, f, indent=2)
83
 
84
  with SafeHDFStore(fpath, mode="a") as store:
85
  store.append(
@@ -131,6 +126,7 @@ def run_from_db(
131
  criterion=criterion,
132
  max_abs_strain=max_abs_strain,
133
  concurrent=concurrent,
 
134
  )
135
  futures.append(future)
136
 
@@ -141,45 +137,3 @@ def run_from_db(
141
  for f in futures
142
  if f.state.is_completed()
143
  ]
144
-
145
-
146
- if __name__ == "__main__":
147
- nodes_per_alloc = 1
148
- gpus_per_alloc = 4
149
- ntasks = 1
150
-
151
- cluster_kwargs = dict(
152
- cores=1,
153
- memory="64 GB",
154
- shebang="#!/bin/bash",
155
- account="m3828",
156
- walltime="00:30:00",
157
- job_mem="0",
158
- job_script_prologue=[
159
- "source ~/.bashrc",
160
- "module load python",
161
- "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena",
162
- ],
163
- job_directives_skip=["-n", "--cpus-per-task", "-J"],
164
- job_extra_directives=[
165
- "-J eos",
166
- "-q debug",
167
- f"-N {nodes_per_alloc}",
168
- "-C gpu",
169
- f"-G {gpus_per_alloc}",
170
- ],
171
- )
172
-
173
- cluster = SLURMCluster(**cluster_kwargs)
174
- print(cluster.job_script())
175
- cluster.adapt(minimum_jobs=2, maximum_jobs=2)
176
- client = Client(cluster)
177
-
178
- run_from_db_ = run_from_db.with_options(
179
- task_runner=DaskTaskRunner(address=client.scheduler.address),
180
- log_prints=True,
181
- )
182
-
183
- results = run_from_db_(
184
- db_path="sqs_Fe-Ni-Cr.db", out_path="eos.h5", table_name="Fe-Ni-Cr"
185
- )
 
1
  from functools import partial
2
  from pathlib import Path
 
3
 
4
  import pandas as pd
 
 
5
  from huggingface_hub import hf_hub_download
6
  from prefect import Task, flow, task
7
  from prefect.client.schemas.objects import TaskRun
8
  from prefect.futures import wait
9
+ from prefect.states import State
 
10
 
11
  from ase.db import connect
12
  from mlip_arena.data.local import SafeHDFStore
 
43
 
44
  if not isinstance(result, dict):
45
  return
46
+
47
  try:
48
  atoms = result["atoms"]
49
  calculator_name = (
 
74
  family_path = Path(__file__).parent / REGISTRY[calculator_name]["family"]
75
  family_path.mkdir(parents=True, exist_ok=True)
76
 
77
+ df.to_json(family_path / f"{calculator_name}_{formula}.json", indent=2)
 
78
 
79
  with SafeHDFStore(fpath, mode="a") as store:
80
  store.append(
 
126
  criterion=criterion,
127
  max_abs_strain=max_abs_strain,
128
  concurrent=concurrent,
129
+ cache_opt=False,
130
  )
131
  futures.append(future)
132
 
 
137
  for f in futures
138
  if f.state.is_completed()
139
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
mlip_arena/tasks/eos_alloy/input.py CHANGED
@@ -22,17 +22,75 @@ Authors
22
 
23
  import os
24
  from pathlib import Path
 
25
 
26
  import numpy as np
27
- from dotenv import load_dotenv
28
- from huggingface_hub import HfApi
29
  from tqdm.auto import tqdm
30
 
31
  from ase import Atoms
32
- from ase.build import bulk
33
  from ase.db import connect
34
 
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def body_order(n=32, b=5):
37
  """
38
  Generate all possible combinations of atomic counts for `b` species
@@ -69,17 +127,16 @@ def get_endmember(structure, conc_lst, elements):
69
  def generate_alloy_db(
70
  structure_template: Atoms,
71
  elements: list[str],
72
- local_path: Path | None = None,
73
  upload: bool = True,
 
74
  repo_id: str = "atomind/mlip-arena",
 
75
  ) -> Path:
76
- # Load Hugging Face API token
77
- load_dotenv()
78
- hf_token = os.getenv("HF_TOKEN", None)
79
-
80
  if upload and hf_token is None:
81
- raise ValueError("HF_TOKEN environment variable not set.")
82
-
83
  num_atoms = len(structure_template)
84
  num_species = len(elements)
85
 
@@ -88,45 +145,35 @@ def generate_alloy_db(
88
 
89
  # Prepare the database
90
  db_path = (
91
- local_path or Path(__file__).resolve().parent / f"sqs_{'-'.join(elements)}.db"
92
  )
93
  db_path.unlink(missing_ok=True)
94
 
95
- # Generate and save structures
96
- with connect(db_path) as db:
97
- for i, composition in tqdm(
98
- enumerate(configurations), total=len(configurations)
99
- ):
100
- # Skip trivial cases where only one element is present
101
- if sum(composition == 0) != len(elements) - 1:
102
- atoms = generate_sqs(
103
- structure_template=structure_template,
104
- elements=np.array(elements)[composition != 0],
105
- counts=composition[composition != 0],
106
- )
107
- else:
108
- atoms = get_endmember(
109
- structure=structure_template.copy(),
110
- conc_lst=composition,
111
- elements=elements,
112
- )
113
- db.write(atoms)
114
-
115
- # Upload the database to Hugging Face Hub
116
- if upload:
117
- api = HfApi(token=hf_token)
118
- api.upload_file(
119
- path_or_fileobj=db_path,
120
- path_in_repo=f"{Path(__file__).parent.name}/{db_path.name}",
121
- repo_id=repo_id,
122
- repo_type="dataset",
123
- )
124
- print(f"Database uploaded: {db_path}")
125
-
126
- return db_path
127
-
128
-
129
- if __name__ == "__main__":
130
- structure_template = bulk("Al", a=3.6, cubic=True).repeat([2, 2, 2])
131
- elements = ["Fe", "Ni", "Cr"]
132
- generate_alloy_db(structure_template, elements, upload=True)
 
22
 
23
  import os
24
  from pathlib import Path
25
+ from typing import Generator, Iterable
26
 
27
  import numpy as np
28
+ from huggingface_hub import HfApi, hf_hub_download
29
+ from prefect import task
30
  from tqdm.auto import tqdm
31
 
32
  from ase import Atoms
 
33
  from ase.db import connect
34
 
35
 
36
+ def save_to_db(
37
+ atoms_list: list[Atoms] | Iterable[Atoms] | Atoms,
38
+ db_path: Path | str,
39
+ upload: bool = True,
40
+ hf_token: str | None = os.getenv("HF_TOKEN", None),
41
+ repo_id: str = "atomind/mlip-arena",
42
+ repo_type: str = "dataset",
43
+ subfolder: str = Path(__file__).parent.name,
44
+ ):
45
+ """Save ASE Atoms objects to an ASE database and optionally upload to Hugging Face Hub."""
46
+
47
+ if upload and hf_token is None:
48
+ raise ValueError("HF_TOKEN is required to upload the database.")
49
+
50
+ db_path = Path(db_path)
51
+
52
+ if isinstance(atoms_list, Atoms):
53
+ atoms_list = [atoms_list]
54
+
55
+ with connect(db_path) as db:
56
+ for atoms in atoms_list:
57
+ if not isinstance(atoms, Atoms):
58
+ raise ValueError("atoms_list must contain ASE Atoms objects.")
59
+ db.write(atoms)
60
+
61
+ if upload:
62
+ api = HfApi(token=hf_token)
63
+ api.upload_file(
64
+ path_or_fileobj=db_path,
65
+ path_in_repo=f"{subfolder}/{db_path.name}",
66
+ repo_id=repo_id,
67
+ repo_type=repo_type,
68
+ )
69
+ print(f"{db_path.name} uploaded to {repo_id}/{subfolder}")
70
+
71
+ return db_path
72
+
73
+ @task
74
+ def get_atoms_from_db(
75
+ db_path: Path | str,
76
+ repo_id: str = "atomind/mlip-arena",
77
+ repo_type: str = "dataset",
78
+ subfolder: str = Path(__file__).parent.name,
79
+ ) -> Generator[Atoms, None, None]:
80
+ """Retrieve ASE Atoms objects from an ASE database."""
81
+ db_path = Path(db_path)
82
+ if not db_path.exists():
83
+ db_path = hf_hub_download(
84
+ repo_id=repo_id,
85
+ repo_type=repo_type,
86
+ subfolder=subfolder,
87
+ filename=str(db_path),
88
+ )
89
+ with connect(db_path) as db:
90
+ for row in db.select():
91
+ yield row.toatoms()
92
+
93
+
94
  def body_order(n=32, b=5):
95
  """
96
  Generate all possible combinations of atomic counts for `b` species
 
127
  def generate_alloy_db(
128
  structure_template: Atoms,
129
  elements: list[str],
130
+ db_path: Path | str,
131
  upload: bool = True,
132
+ hf_token: str | None = os.getenv("HF_TOKEN", None),
133
  repo_id: str = "atomind/mlip-arena",
134
+ repo_type: str = "dataset",
135
  ) -> Path:
136
+
 
 
 
137
  if upload and hf_token is None:
138
+ raise ValueError("HF_TOKEN is required to upload the database.")
139
+
140
  num_atoms = len(structure_template)
141
  num_species = len(elements)
142
 
 
145
 
146
  # Prepare the database
147
  db_path = (
148
+ Path(db_path) or Path(__file__).resolve().parent / f"sqs_{'-'.join(elements)}.db"
149
  )
150
  db_path.unlink(missing_ok=True)
151
 
152
+ atoms_list = []
153
+ for i, composition in tqdm(
154
+ enumerate(configurations), total=len(configurations)
155
+ ):
156
+ # Skip trivial cases where only one element is present
157
+ if sum(composition == 0) != len(elements) - 1:
158
+ atoms = generate_sqs(
159
+ structure_template=structure_template,
160
+ elements=np.array(elements)[composition != 0],
161
+ counts=composition[composition != 0],
162
+ )
163
+ else:
164
+ atoms = get_endmember(
165
+ structure=structure_template.copy(),
166
+ conc_lst=composition,
167
+ elements=elements,
168
+ )
169
+ atoms_list.append(atoms)
170
+
171
+
172
+ return save_to_db(
173
+ atoms_list=atoms_list,
174
+ db_path=db_path,
175
+ upload=upload,
176
+ hf_token=hf_token,
177
+ repo_id=repo_id,
178
+ repo_type=repo_type,
179
+ )