Yuan (Cyrus) Chiang commited on
Commit
3397e50
·
unverified ·
1 Parent(s): 75ac94f

Add eSEN EOS (#59, #56)

Browse files

Add eSEN EOS and EV scan results following #56

Files changed (46) hide show
  1. .gitattributes +7 -0
  2. .github/workflows/sync-hf.yaml +1 -1
  3. .github/workflows/test.yaml +39 -11
  4. examples/eos_bulk/CHGNet_processed.parquet +2 -2
  5. examples/eos_bulk/M3GNet_processed.parquet +2 -2
  6. examples/eos_bulk/MACE-MP(M)_processed.parquet +2 -2
  7. examples/eos_bulk/MACE-MPA_processed.parquet +2 -2
  8. examples/eos_bulk/MatterSim_processed.parquet +2 -2
  9. examples/eos_bulk/ORBv2_processed.parquet +2 -2
  10. examples/eos_bulk/SevenNet_processed.parquet +2 -2
  11. examples/eos_bulk/eSEN.parquet +3 -0
  12. examples/eos_bulk/eSEN_processed.parquet +3 -0
  13. examples/eos_bulk/run.py +107 -72
  14. examples/eos_bulk/summary.csv +8 -7
  15. examples/eos_bulk/summary.tex +8 -7
  16. examples/mof/CHGNet.pkl +0 -0
  17. examples/mof/M3GNet.pkl +0 -0
  18. examples/mof/MACE-MP(M).pkl +0 -0
  19. examples/mof/MACE-MPA.pkl +0 -0
  20. examples/mof/MatterSim.pkl +0 -0
  21. examples/mof/ORBv2.pkl +0 -0
  22. examples/mof/classification/M3GNet.pkl +3 -0
  23. examples/mof/classification/MACE-MP(M).pkl +3 -0
  24. examples/mof/classification/MACE-MPA.pkl +3 -0
  25. examples/mof/classification/MatterSim.pkl +3 -0
  26. examples/mof/classification/ORBv2.pkl +3 -0
  27. examples/mof/classification/SevenNet.pkl +3 -0
  28. examples/mof/classification/analysis.ipynb +380 -0
  29. examples/mof/classification/input.pkl +3 -0
  30. examples/mof/classification/mof-misclassification_margin.pdf +0 -0
  31. examples/wbm_ev/ALIGNN_processed.parquet +2 -2
  32. examples/wbm_ev/CHGNet_processed.parquet +2 -2
  33. examples/wbm_ev/M3GNet_processed.parquet +2 -2
  34. examples/wbm_ev/MACE-MP(M)_processed.parquet +2 -2
  35. examples/wbm_ev/MACE-MPA_processed.parquet +2 -2
  36. examples/wbm_ev/MatterSim_processed.parquet +2 -2
  37. examples/wbm_ev/ORBv2_processed.parquet +2 -2
  38. examples/wbm_ev/SevenNet_processed.parquet +2 -2
  39. examples/wbm_ev/eSEN.parquet +3 -0
  40. examples/wbm_ev/eSEN_processed.parquet +3 -0
  41. examples/wbm_ev/eqV2(OMat)_processed.parquet +2 -2
  42. examples/wbm_ev/run.py +38 -38
  43. examples/wbm_ev/summary.csv +10 -9
  44. examples/wbm_ev/summary.tex +10 -9
  45. mlip_arena/models/registry.yaml +4 -2
  46. pyproject.toml +3 -3
.gitattributes CHANGED
@@ -1,3 +1,10 @@
1
  *.json filter=lfs diff=lfs merge=lfs -text
2
  *.parquet filter=lfs diff=lfs merge=lfs -text
3
  *.db filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
1
  *.json filter=lfs diff=lfs merge=lfs -text
2
  *.parquet filter=lfs diff=lfs merge=lfs -text
3
  *.db filter=lfs diff=lfs merge=lfs -text
4
+ examples/mof/classification/SevenNet.pkl filter=lfs diff=lfs merge=lfs -text
5
+ examples/mof/classification/input.pkl filter=lfs diff=lfs merge=lfs -text
6
+ examples/mof/classification/M3GNet.pkl filter=lfs diff=lfs merge=lfs -text
7
+ examples/mof/classification/MACE-MPA.pkl filter=lfs diff=lfs merge=lfs -text
8
+ examples/mof/classification/MACE-MP(M).pkl filter=lfs diff=lfs merge=lfs -text
9
+ examples/mof/classification/MatterSim.pkl filter=lfs diff=lfs merge=lfs -text
10
+ examples/mof/classification/ORBv2.pkl filter=lfs diff=lfs merge=lfs -text
.github/workflows/sync-hf.yaml CHANGED
@@ -12,7 +12,7 @@ jobs:
12
  if: ${{ github.event.workflow_run.conclusion == 'success' }}
13
  runs-on: ubuntu-latest
14
  steps:
15
- - uses: actions/checkout@v3
16
  with:
17
  fetch-depth: 0
18
  lfs: true
 
12
  if: ${{ github.event.workflow_run.conclusion == 'success' }}
13
  runs-on: ubuntu-latest
14
  steps:
15
+ - uses: actions/checkout@v4
16
  with:
17
  fetch-depth: 0
18
  lfs: true
.github/workflows/test.yaml CHANGED
@@ -14,14 +14,15 @@ jobs:
14
  runs-on: ubuntu-latest
15
 
16
  strategy:
17
- # max-parallel: 2
18
  matrix:
19
  python-version: ["3.10", "3.11", "3.12"]
20
-
21
 
22
  steps:
23
- - name: Checkout repository
24
  uses: actions/checkout@v4
 
 
 
25
 
26
  - name: Install uv
27
  uses: astral-sh/setup-uv@v6
@@ -36,22 +37,49 @@ jobs:
36
  python-version: ${{ matrix.python-version }}
37
 
38
  - name: Install dependencies
39
- run: |
40
- bash scripts/install-linux.sh
41
 
42
  - name: List dependencies
43
  run: pip list
44
-
45
- - name: Login huggingface
46
- # if: ${{ github.event.pull_request.head.repo.full_name == github.repository }}
47
  env:
48
  HF_TOKEN: ${{ secrets.HF_TOKEN_READ_ONLY }}
49
- run:
50
- huggingface-cli login --token $HF_TOKEN
51
 
52
  - name: Run tests
53
  env:
54
  PREFECT_API_KEY: ${{ secrets.PREFECT_API_KEY }}
55
  PREFECT_API_URL: ${{ secrets.PREFECT_API_URL }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  run: |
57
- pytest -vra -n 5 --dist=loadscope tests
 
14
  runs-on: ubuntu-latest
15
 
16
  strategy:
 
17
  matrix:
18
  python-version: ["3.10", "3.11", "3.12"]
 
19
 
20
  steps:
21
+ - name: Checkout PR with full history
22
  uses: actions/checkout@v4
23
+ with:
24
+ lfs: true
25
+ fetch-depth: 0
26
 
27
  - name: Install uv
28
  uses: astral-sh/setup-uv@v6
 
37
  python-version: ${{ matrix.python-version }}
38
 
39
  - name: Install dependencies
40
+ run: bash scripts/install-linux.sh
 
41
 
42
  - name: List dependencies
43
  run: pip list
44
+
45
+ - name: Login to Hugging Face
 
46
  env:
47
  HF_TOKEN: ${{ secrets.HF_TOKEN_READ_ONLY }}
48
+ run: huggingface-cli login --token $HF_TOKEN
 
49
 
50
  - name: Run tests
51
  env:
52
  PREFECT_API_KEY: ${{ secrets.PREFECT_API_KEY }}
53
  PREFECT_API_URL: ${{ secrets.PREFECT_API_URL }}
54
+ run: pytest -vra -n 5 --dist=loadscope tests
55
+
56
+ - name: Squash commits and trial push to Hugging Face
57
+ if: github.event_name == 'pull_request'
58
+ id: trial_push
59
+ env:
60
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
61
+ TRIAL_BRANCH: trial-sync-${{ github.sha }}
62
+ run: |
63
+ # Configure Git user identity
64
+ git config user.name "github-actions[ci]"
65
+ git config user.email "github-actions[ci]@users.noreply.github.com"
66
+
67
+ # Rebase and squash all PR commits into one
68
+ BASE=$(git merge-base origin/main HEAD)
69
+ git reset --soft $BASE
70
+ git commit -m "Squashed commit from PR #${{ github.event.pull_request.number }}"
71
+
72
+ # Setup LFS
73
+ git lfs fetch
74
+ git lfs checkout
75
+
76
+ # Push to temporary branch on Hugging Face
77
+ git push -f https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/atomind/mlip-arena HEAD:refs/heads/$TRIAL_BRANCH
78
+
79
+ - name: Delete trial branch from Hugging Face
80
+ if: steps.trial_push.outcome == 'success'
81
+ env:
82
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
83
+ TRIAL_BRANCH: trial-sync-${{ github.sha }}
84
  run: |
85
+ git push https://HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/atomind/mlip-arena --delete $TRIAL_BRANCH || true
examples/eos_bulk/CHGNet_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6fbea63f9035e376bb5ac7db38175102ab3f96a0f8758cc3e9931424f829ac0
3
- size 357919
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0416eeed1748994b67e8f6e9768a5f1d2a77c19f9512bc408f9b39ca3c19e3d4
3
+ size 358042
examples/eos_bulk/M3GNet_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18ea51bf19c5e011e170a3229bbd63ce675a725c364e0cffb71de95459f8629e
3
- size 379859
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a34f8148f771f0b751f01ccc9d260fd5ae48b625b979ec2112e008c82c59a08
3
+ size 379982
examples/eos_bulk/MACE-MP(M)_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f492125c87400fee013d32904ee97df49a07d321bd393f9335c7bf4258fe159
3
- size 371004
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73e2b8ad6d5e114c1c0fea4697b17810182d0b273185512cb40fa894ea30b4c3
3
+ size 371128
examples/eos_bulk/MACE-MPA_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34bd9ae08656e374263820774f49e91a964aa8b2aeade4150cf62cfc08bb37f6
3
- size 365289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:819bc0c721e99df8dda0a4c6df565deb96736ecc5ceefefe300e5b72b7d6312f
3
+ size 365412
examples/eos_bulk/MatterSim_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cca22b5db67dae59602adfb8c42a80da90b39c4e95af89ef918813351b422119
3
- size 320962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c707ffb285f03a5c7d1486a6998c787088f07a97b206585b17839fff4fab49b4
3
+ size 321086
examples/eos_bulk/ORBv2_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c90e61645c83f2452bcd90ef9132c46266896217fe1dea9cca8e0d124d73821a
3
- size 227929
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f794da83d1031823577b085c480f7d285520c086bdd0e7e6e7acb7a5a2457329
3
+ size 228052
examples/eos_bulk/SevenNet_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cc03d9af93c001f3fa441b50f058506e13c0aa7cb3d329275e68f5ed80dc3e6
3
- size 364846
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9aab95402aa62169ba6f1e12a7774362b3e5cc027f5c556de734783e6d6f29b
3
+ size 364969
examples/eos_bulk/eSEN.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4503e17151b7376bbd88dc8c4767747e7290e8eae898e050b0a231a5c447e3e6
3
+ size 427652
examples/eos_bulk/eSEN_processed.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d12b36a2bd465e16ada4363e31756d5de5d41dd890d0e88e8ca86b76dd66336
3
+ size 313235
examples/eos_bulk/run.py CHANGED
@@ -1,57 +1,70 @@
1
- import functools
2
  from pathlib import Path
3
 
4
  import pandas as pd
 
5
  from ase.db import connect
6
  from dask.distributed import Client
7
  from dask_jobqueue import SLURMCluster
8
- from prefect import Task, flow, task
9
- from prefect.client.schemas.objects import TaskRun
10
- from prefect.states import State
11
  from prefect_dask import DaskTaskRunner
12
 
13
  from mlip_arena.models import REGISTRY, MLIPEnum
14
- from mlip_arena.tasks.eos import run as EOS
15
- from mlip_arena.tasks.optimize import run as OPT
16
  from mlip_arena.tasks.utils import get_calculator
17
 
18
 
19
  @task
20
  def load_wbm_structures():
21
  """
22
- Load the WBM structures from a ASE DB file.
 
 
 
 
 
 
 
23
  """
24
  with connect("../wbm_structures.db") as db:
25
  for row in db.select():
26
  yield row.toatoms(add_additional_information=True)
27
 
28
 
29
- def save_result(
30
- tsk: Task,
31
- run: TaskRun,
32
- state: State,
33
- model_name: str,
34
- id: str,
35
- ):
36
- result = run.state.result()
37
 
38
- assert isinstance(result, dict)
39
 
40
- result["method"] = model_name
41
- result["id"] = id
42
- result.pop("atoms", None)
43
 
44
- fpath = Path(f"{model_name}")
45
- fpath.mkdir(exist_ok=True)
46
 
47
- fpath = fpath / f"{result['id']}.pkl"
48
 
49
- df = pd.DataFrame([result])
50
- df.to_pickle(fpath)
51
 
52
 
53
- @task
54
- def eos_bulk(atoms, model):
 
 
 
 
 
 
 
55
 
56
  calculator = get_calculator(
57
  model
@@ -67,14 +80,13 @@ def eos_bulk(atoms, model):
67
  fmax=0.1,
68
  ),
69
  )
70
-
71
- return EOS.with_options(
72
  refresh_cache=True,
73
- on_completion=[functools.partial(
74
- save_result,
75
- model_name=model.name,
76
- id=atoms.info["key_value_pairs"]["wbm_id"],
77
- )],
78
  )(
79
  atoms=result["atoms"],
80
  calculator=calculator,
@@ -84,52 +96,75 @@ def eos_bulk(atoms, model):
84
  concurrent=False
85
  )
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  @flow
89
- def run_all():
90
  futures = []
91
  for atoms in load_wbm_structures():
92
- for model in MLIPEnum:
93
- if "eos_bulk" not in REGISTRY[model.name].get("gpu-tasks", []):
94
- continue
95
- result = eos_bulk.submit(atoms, model)
 
 
 
 
96
  futures.append(result)
 
 
 
97
  return [f.result(raise_on_failure=False) for f in futures]
98
 
99
 
100
- nodes_per_alloc = 1
101
- gpus_per_alloc = 1
102
- ntasks = 1
103
-
104
- cluster_kwargs = dict(
105
- cores=4,
106
- memory="64 GB",
107
- shebang="#!/bin/bash",
108
- account="m3828",
109
- walltime="00:50:00",
110
- job_mem="0",
111
- job_script_prologue=[
112
- "source ~/.bashrc",
113
- "module load python",
114
- "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena",
115
- ],
116
- job_directives_skip=["-n", "--cpus-per-task", "-J"],
117
- job_extra_directives=[
118
- "-J eos_bulk",
119
- "-q regular",
120
- f"-N {nodes_per_alloc}",
121
- "-C gpu",
122
- f"-G {gpus_per_alloc}",
123
- "--exclusive",
124
- ],
125
- )
 
 
126
 
127
- cluster = SLURMCluster(**cluster_kwargs)
128
- print(cluster.job_script())
129
- cluster.adapt(minimum_jobs=20, maximum_jobs=40)
130
- client = Client(cluster)
131
 
132
- run_all.with_options(
133
- task_runner=DaskTaskRunner(address=client.scheduler.address),
134
- log_prints=True,
135
- )()
 
1
+ # import functools
2
  from pathlib import Path
3
 
4
  import pandas as pd
5
+ from ase import Atoms
6
  from ase.db import connect
7
  from dask.distributed import Client
8
  from dask_jobqueue import SLURMCluster
9
+ from prefect import flow, task
10
+ from prefect.cache_policies import INPUTS, TASK_SOURCE
11
+ from prefect.runtime import task_run
12
  from prefect_dask import DaskTaskRunner
13
 
14
  from mlip_arena.models import REGISTRY, MLIPEnum
 
 
15
  from mlip_arena.tasks.utils import get_calculator
16
 
17
 
18
  @task
19
  def load_wbm_structures():
20
  """
21
+ Load the WBM structures from an ASE database file.
22
+
23
+ Reads structures from 'wbm_structures.db' and yields them as ASE Atoms objects
24
+ with additional metadata preserved from the database.
25
+
26
+ Yields:
27
+ ase.Atoms: Individual atomic structures from the WBM database with preserved
28
+ metadata in the .info dictionary.
29
  """
30
  with connect("../wbm_structures.db") as db:
31
  for row in db.select():
32
  yield row.toatoms(add_additional_information=True)
33
 
34
 
35
+ # def save_result(
36
+ # tsk: Task,
37
+ # run: TaskRun,
38
+ # state: State,
39
+ # model_name: str,
40
+ # id: str,
41
+ # ):
42
+ # result = run.state.result()
43
 
44
+ # assert isinstance(result, dict)
45
 
46
+ # result["method"] = model_name
47
+ # result["id"] = id
48
+ # result.pop("atoms", None)
49
 
50
+ # fpath = Path(f"{model_name}")
51
+ # fpath.mkdir(exist_ok=True)
52
 
53
+ # fpath = fpath / f"{result['id']}.pkl"
54
 
55
+ # df = pd.DataFrame([result])
56
+ # df.to_pickle(fpath)
57
 
58
 
59
+ @task(
60
+ name="EOS bulk - WBM",
61
+ task_run_name=lambda: f"{task_run.task_name}: {task_run.parameters['atoms'].get_chemical_formula()} - {task_run.parameters['model'].name}",
62
+ cache_policy=TASK_SOURCE + INPUTS,
63
+ )
64
+ def eos_bulk(atoms: Atoms, model: MLIPEnum):
65
+
66
+ from mlip_arena.tasks.eos import run as EOS
67
+ from mlip_arena.tasks.optimize import run as OPT
68
 
69
  calculator = get_calculator(
70
  model
 
80
  fmax=0.1,
81
  ),
82
  )
83
+ result = EOS.with_options(
 
84
  refresh_cache=True,
85
+ # on_completion=[functools.partial(
86
+ # save_result,
87
+ # model_name=model.name,
88
+ # id=atoms.info["key_value_pairs"]["wbm_id"],
89
+ # )],
90
  )(
91
  atoms=result["atoms"],
92
  calculator=calculator,
 
96
  concurrent=False
97
  )
98
 
99
+ result["method"] = model.name
100
+ result["id"] = atoms.info["key_value_pairs"]["wbm_id"]
101
+ result.pop("atoms", None)
102
+
103
+ fpath = Path(f"{model.name}")
104
+ fpath.mkdir(exist_ok=True)
105
+
106
+ fpath = fpath / f"{result['id']}.pkl"
107
+
108
+ df = pd.DataFrame([result])
109
+ df.to_pickle(fpath)
110
+
111
+ return df
112
+
113
 
114
  @flow
115
+ def submit_tasks():
116
  futures = []
117
  for atoms in load_wbm_structures():
118
+ model = MLIPEnum["eSEN"]
119
+ # for model in MLIPEnum:
120
+ if "eos_bulk" not in REGISTRY[model.name].get("gpu-tasks", []):
121
+ continue
122
+ try:
123
+ result = eos_bulk.with_options(
124
+ refresh_cache=True
125
+ ).submit(atoms, model)
126
  futures.append(result)
127
+ except Exception:
128
+ # print(f"Failed to submit task for {model.name}: {e}")
129
+ continue
130
  return [f.result(raise_on_failure=False) for f in futures]
131
 
132
 
133
+ if __name__ == "__main__":
134
+ nodes_per_alloc = 1
135
+ gpus_per_alloc = 1
136
+ ntasks = 1
137
+
138
+ cluster_kwargs = dict(
139
+ cores=1,
140
+ memory="64 GB",
141
+ shebang="#!/bin/bash",
142
+ account="m3828",
143
+ walltime="00:30:00",
144
+ job_mem="0",
145
+ job_script_prologue=[
146
+ "source ~/.bashrc",
147
+ "module load python",
148
+ "module load cudatoolkit/12.4",
149
+ "source activate /pscratch/sd/c/cyrusyc/.conda/dev",
150
+ ],
151
+ job_directives_skip=["-n", "--cpus-per-task", "-J"],
152
+ job_extra_directives=[
153
+ "-J eos_bulk",
154
+ "-q regular",
155
+ f"-N {nodes_per_alloc}",
156
+ "-C gpu",
157
+ f"-G {gpus_per_alloc}",
158
+ # "--exclusive",
159
+ ],
160
+ )
161
 
162
+ cluster = SLURMCluster(**cluster_kwargs)
163
+ print(cluster.job_script())
164
+ cluster.adapt(minimum_jobs=50, maximum_jobs=50)
165
+ client = Client(cluster)
166
 
167
+ submit_tasks.with_options(
168
+ task_runner=DaskTaskRunner(address=client.scheduler.address),
169
+ log_prints=True,
170
+ )()
examples/eos_bulk/summary.csv CHANGED
@@ -1,8 +1,9 @@
1
  model,rank,rank-aggregation,energy-diff-flip-times,tortuosity,spearman-compression-energy,spearman-compression-derivative,spearman-tension-energy,missing
2
- MACE-MPA,1,6,1.0370741482965933,1.005455197941088,-0.9993684338373716,0.9963320580555048,0.993186372745491,2
3
- MACE-MP(M),2,16,1.042211055276382,1.008986842539345,-0.999329983249581,0.9941160347190496,0.9915857612939804,5
4
- MatterSim,3,18,1.045135406218656,1.0060900449752808,-0.99734962463147,0.9927904926901917,0.9880977115916667,3
5
- CHGNet,4,22,1.1053159478435306,1.014753469076796,-0.9964985866690981,0.9929971733381963,0.9866417434120545,3
6
- SevenNet,5,27,1.1093279839518555,1.0186969977862483,-0.9981277164827815,0.9889121911188109,0.9859580417030127,3
7
- M3GNet,6,33,1.1748743718592964,1.0175007963267957,-0.9963209989340641,0.9897426526572255,0.9801690217498693,5
8
- ORBv2,7,42,1.3162134944612287,1.0374718753890275,-0.9918459519667977,0.9701425127407,0.9637462235649547,7
 
 
1
  model,rank,rank-aggregation,energy-diff-flip-times,tortuosity,spearman-compression-energy,spearman-compression-derivative,spearman-tension-energy,missing
2
+ MACE-MPA,1,7,1.0370741482965933,1.005455197941088,-0.9993684338373716,0.9963320580555048,0.993186372745491,2
3
+ eSEN,2,15,1.042211055276382,1.0082267858369258,-0.9993299832495811,0.9968570123343992,0.9920968478757424,5
4
+ MACE-MP(M),3,20,1.042211055276382,1.008986842539345,-0.999329983249581,0.9941160347190496,0.9915857612939804,5
5
+ MatterSim,4,22,1.045135406218656,1.0060900449752808,-0.99734962463147,0.9927904926901917,0.9880977115916667,3
6
+ CHGNet,5,27,1.1053159478435306,1.014753469076796,-0.9964985866690981,0.9929971733381963,0.9866417434120545,3
7
+ SevenNet,6,32,1.1093279839518555,1.0186969977862483,-0.9981277164827815,0.9889121911188109,0.9859580417030127,3
8
+ M3GNet,7,38,1.1748743718592964,1.0175007963267957,-0.9963209989340641,0.9897426526572255,0.9801690217498693,5
9
+ ORBv2,8,48,1.3162134944612287,1.0374718753890275,-0.9918459519667977,0.9701425127407,0.9637462235649547,7
examples/eos_bulk/summary.tex CHANGED
@@ -2,12 +2,13 @@
2
  \toprule
3
  model & rank & rank-aggregation & energy-diff-flip-times & tortuosity & spearman-compression-energy & spearman-compression-derivative & spearman-tension-energy & missing \\
4
  \midrule
5
- MACE-MPA & 1 & 6 & 1.037074 & 1.005455 & -0.999368 & 0.996332 & 0.993186 & 2 \\
6
- MACE-MP(M) & 2 & 16 & 1.042211 & 1.008987 & -0.999330 & 0.994116 & 0.991586 & 5 \\
7
- MatterSim & 3 & 18 & 1.045135 & 1.006090 & -0.997350 & 0.992790 & 0.988098 & 3 \\
8
- CHGNet & 4 & 22 & 1.105316 & 1.014753 & -0.996499 & 0.992997 & 0.986642 & 3 \\
9
- SevenNet & 5 & 27 & 1.109328 & 1.018697 & -0.998128 & 0.988912 & 0.985958 & 3 \\
10
- M3GNet & 6 & 33 & 1.174874 & 1.017501 & -0.996321 & 0.989743 & 0.980169 & 5 \\
11
- ORBv2 & 7 & 42 & 1.316213 & 1.037472 & -0.991846 & 0.970143 & 0.963746 & 7 \\
 
12
  \bottomrule
13
  \end{tabular}
 
2
  \toprule
3
  model & rank & rank-aggregation & energy-diff-flip-times & tortuosity & spearman-compression-energy & spearman-compression-derivative & spearman-tension-energy & missing \\
4
  \midrule
5
+ MACE-MPA & 1 & 7 & 1.037074 & 1.005455 & -0.999368 & 0.996332 & 0.993186 & 2 \\
6
+ eSEN & 2 & 15 & 1.042211 & 1.008227 & -0.999330 & 0.996857 & 0.992097 & 5 \\
7
+ MACE-MP(M) & 3 & 20 & 1.042211 & 1.008987 & -0.999330 & 0.994116 & 0.991586 & 5 \\
8
+ MatterSim & 4 & 22 & 1.045135 & 1.006090 & -0.997350 & 0.992790 & 0.988098 & 3 \\
9
+ CHGNet & 5 & 27 & 1.105316 & 1.014753 & -0.996499 & 0.992997 & 0.986642 & 3 \\
10
+ SevenNet & 6 & 32 & 1.109328 & 1.018697 & -0.998128 & 0.988912 & 0.985958 & 3 \\
11
+ M3GNet & 7 & 38 & 1.174874 & 1.017501 & -0.996321 & 0.989743 & 0.980169 & 5 \\
12
+ ORBv2 & 8 & 48 & 1.316213 & 1.037472 & -0.991846 & 0.970143 & 0.963746 & 7 \\
13
  \bottomrule
14
  \end{tabular}
examples/mof/CHGNet.pkl CHANGED
Binary files a/examples/mof/CHGNet.pkl and b/examples/mof/CHGNet.pkl differ
 
examples/mof/M3GNet.pkl CHANGED
Binary files a/examples/mof/M3GNet.pkl and b/examples/mof/M3GNet.pkl differ
 
examples/mof/MACE-MP(M).pkl CHANGED
Binary files a/examples/mof/MACE-MP(M).pkl and b/examples/mof/MACE-MP(M).pkl differ
 
examples/mof/MACE-MPA.pkl CHANGED
Binary files a/examples/mof/MACE-MPA.pkl and b/examples/mof/MACE-MPA.pkl differ
 
examples/mof/MatterSim.pkl CHANGED
Binary files a/examples/mof/MatterSim.pkl and b/examples/mof/MatterSim.pkl differ
 
examples/mof/ORBv2.pkl CHANGED
Binary files a/examples/mof/ORBv2.pkl and b/examples/mof/ORBv2.pkl differ
 
examples/mof/classification/M3GNet.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f3e954dba20470846a14796b21dceeac03c02db8613527d441f387063697efe
3
+ size 218426
examples/mof/classification/MACE-MP(M).pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d0967389772b92764ecc9a521d83fefff9f99ac365bbd8ba643bec32313ce57
3
+ size 197302
examples/mof/classification/MACE-MPA.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d8ca57b95e2089ea86e28b3866bc3118e0893832a0be94ca9399002440ea4e3
3
+ size 299928
examples/mof/classification/MatterSim.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7466643aedc9c6ea0b2a34fa9227fa99eeb28c67818de077b7b5edd8d49bf47
3
+ size 298511
examples/mof/classification/ORBv2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69c635f37e99782d0d0f2eb413f57285df0ed4666e390a2171f4d4b2b2febf36
3
+ size 248466
examples/mof/classification/SevenNet.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8285152e645f30facb7c53298075fd6b7483149944c98bf52f46bb1c3e62b67
3
+ size 249418
examples/mof/classification/analysis.ipynb ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "# %matplotlib inline\n",
10
+ "\n",
11
+ "# import pandas as pd\n",
12
+ "# import seaborn as sns\n",
13
+ "# from matplotlib import pyplot as plt\n",
14
+ "# from ase import units\n",
15
+ "# from pathlib import Path\n",
16
+ "\n",
17
+ "# from mlip_arena.models import MLIPEnum\n",
18
+ "\n",
19
+ "\n",
20
+ "# color_mapping = {\n",
21
+ "# 'DAC': '#e41a1c', # e.g., red for DAC\n",
22
+ "# 'Flue Gas': '#377eb8', # e.g., blue\n",
23
+ "# 'General': '#4daf4a' # e.g., green\n",
24
+ "# }\n",
25
+ "\n",
26
+ "# for model in MLIPEnum:\n",
27
+ "\n",
28
+ "# fpath = Path(f\"{model.name}.pkl\")\n",
29
+ "\n",
30
+ "# if not fpath.exists():\n",
31
+ "# continue\n",
32
+ "\n",
33
+ "# df = pd.read_pickle(fpath)\n",
34
+ "# df.drop_duplicates(subset=['model', 'name', 'class'], keep='last')\n",
35
+ "\n",
36
+ "# df_exploded = df.explode(['henry_coefficient', 'averaged_interaction_energy', 'heat_of_adsorption'])\n",
37
+ "# df_group = df_exploded.groupby(['model', 'name', 'class'])[['henry_coefficient', 'averaged_interaction_energy', 'heat_of_adsorption']]\n",
38
+ "# df = df_group.mean()\n",
39
+ "\n",
40
+ "# print(model, len(df))\n",
41
+ "\n",
42
+ "# df['averaged_interaction_energy'] = df['averaged_interaction_energy'] * units._e * units._Nav * 1e-3 # Convert from eV to kJ/mol\n",
43
+ "\n",
44
+ "# fig, ax = plt.subplots(figsize=(3, 3))\n",
45
+ "# sns.scatterplot(\n",
46
+ "# data=df,\n",
47
+ "# x=\"averaged_interaction_energy\",\n",
48
+ "# y=\"heat_of_adsorption\",\n",
49
+ "# hue=\"class\",\n",
50
+ "# palette=color_mapping,\n",
51
+ "# ax=ax,\n",
52
+ "# )\n",
53
+ "\n",
54
+ "# ax.set(\n",
55
+ "# title=model.name,\n",
56
+ "# # xlim=(-100, 0),\n",
57
+ "# # ylim=(-100, 0),\n",
58
+ "# xlabel='Averaged Interaction Energy (kJ/mol)',\n",
59
+ "# ylabel='Heat of Adsorption (kJ/mol)',\n",
60
+ "# aspect='equal'\n",
61
+ "# )\n",
62
+ "\n",
63
+ "# # Add horizontal dashed decision boundaries\n",
64
+ "# ax.axhline(y=-50, linestyle='--', color='gray', label='Exp. CO₂ Qst = 50 kJ/mol')\n",
65
+ "# ax.axhline(y=-35, linestyle='--', color='gray', label='Exp. CO₂ Qst = 35 kJ/mol')\n",
66
+ "\n",
67
+ "# ax.legend(\n",
68
+ "# # title='Class',\n",
69
+ "# loc='upper left',\n",
70
+ "# bbox_to_anchor=(1, 1),\n",
71
+ "# frameon=False,\n",
72
+ "# )\n",
73
+ "# plt.show()\n"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": null,
79
+ "metadata": {},
80
+ "outputs": [],
81
+ "source": [
82
+ "from pathlib import Path\n",
83
+ "\n",
84
+ "import pandas as pd\n",
85
+ "import plotly.colors as pcolors\n",
86
+ "import seaborn as sns\n",
87
+ "from matplotlib import pyplot as plt\n",
88
+ "\n",
89
+ "from mlip_arena.models import MLIPEnum\n",
90
+ "\n",
91
+ "mlip_methods = [\n",
92
+ " model.name\n",
93
+ " for model in MLIPEnum\n",
94
+ "]\n",
95
+ "\n",
96
+ "all_attributes = dir(pcolors.qualitative)\n",
97
+ "color_palettes = {\n",
98
+ " attr: getattr(pcolors.qualitative, attr)\n",
99
+ " for attr in all_attributes\n",
100
+ " if isinstance(getattr(pcolors.qualitative, attr), list)\n",
101
+ "}\n",
102
+ "color_palettes.pop(\"__all__\", None)\n",
103
+ "\n",
104
+ "palette_names = list(color_palettes.keys())\n",
105
+ "palette_colors = list(color_palettes.values())\n",
106
+ "palette_name = \"Plotly\"\n",
107
+ "color_sequence = color_palettes[palette_name] # type: ignore\n",
108
+ "\n",
109
+ "method_color_mapping = {\n",
110
+ " method: color_sequence[i % len(color_sequence)]\n",
111
+ " for i, method in enumerate(mlip_methods)\n",
112
+ "}"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": null,
118
+ "metadata": {},
119
+ "outputs": [
120
+ {
121
+ "name": "stderr",
122
+ "output_type": "stream",
123
+ "text": [
124
+ "/tmp/ipykernel_447045/3752450599.py:127: UserWarning: Attempt to set non-positive xlim on a log-scaled axis will be ignored.\n",
125
+ " ax1.set_xlim(xmin, xmax)\n"
126
+ ]
127
+ },
128
+ {
129
+ "data": {
130
+ "image/png": "",
131
+ "text/plain": [
132
+ "<Figure size 600x400 with 2 Axes>"
133
+ ]
134
+ },
135
+ "metadata": {},
136
+ "output_type": "display_data"
137
+ }
138
+ ],
139
+ "source": [
140
+ "%matplotlib inline\n",
141
+ "\n",
142
+ "import numpy as np\n",
143
+ "\n",
144
+ "from mlip_arena.models import MLIPEnum\n",
145
+ "\n",
146
+ "# Color mapping by class\n",
147
+ "color_mapping = {\n",
148
+ " \"DAC\": \"#e41a1c\",\n",
149
+ " \"Flue Gas\": \"#377eb8\",\n",
150
+ " \"General\": \"#4daf4a\"\n",
151
+ "}\n",
152
+ "\n",
153
+ "# Decision boundary thresholds\n",
154
+ "thresholds = {\n",
155
+ " \"General\": (None, 35),\n",
156
+ " \"Flue Gas\": (35, 50),\n",
157
+ " \"DAC\": (50, 100)\n",
158
+ "}\n",
159
+ "\n",
160
+ "# Collect data from all models\n",
161
+ "all_data = []\n",
162
+ "margins = []\n",
163
+ "\n",
164
+ "for model in MLIPEnum:\n",
165
+ " fpath = Path(f\"{model.name}.pkl\")\n",
166
+ " if not fpath.exists():\n",
167
+ " continue\n",
168
+ "\n",
169
+ " df = pd.read_pickle(fpath)\n",
170
+ " df = df.drop_duplicates(subset=[\"model\", \"name\", \"class\"], keep=\"last\")\n",
171
+ " df_exploded = df.explode([\"henry_coefficient\", \"averaged_interaction_energy\", \"heat_of_adsorption\"])\n",
172
+ " df_group = df_exploded.groupby([\"model\", \"name\", \"class\"])[[\"henry_coefficient\", \"averaged_interaction_energy\", \"heat_of_adsorption\"]].mean().reset_index()\n",
173
+ "\n",
174
+ " df_group[\"model_name\"] = model.name\n",
175
+ " df_group[\"neg_heat\"] = -df_group[\"heat_of_adsorption\"] # negate for log scale\n",
176
+ " df_group = df_group[df_group[\"neg_heat\"] > 0] # remove invalid values\n",
177
+ "\n",
178
+ " df_group = df_group[df_group[\"name\"] != \"MIL-96-Al\"]\n",
179
+ "\n",
180
+ " all_data.append(df_group)\n",
181
+ "\n",
182
+ " # Compute misclassification margin\n",
183
+ " def point_misclassified(row):\n",
184
+ " val = row[\"neg_heat\"]\n",
185
+ " lower, upper = thresholds[row[\"class\"]]\n",
186
+ " return (lower is not None and val < lower) or (upper is not None and val >= upper)\n",
187
+ "\n",
188
+ " misclassified = df_group[df_group.apply(point_misclassified, axis=1)]\n",
189
+ "\n",
190
+ " def distance_to_boundary(row):\n",
191
+ " val = row[\"neg_heat\"]\n",
192
+ " lower, upper = thresholds[row[\"class\"]]\n",
193
+ " distances = []\n",
194
+ " if lower is not None:\n",
195
+ " distances.append(abs(val - lower))\n",
196
+ " if upper is not None:\n",
197
+ " distances.append(abs(val - upper))\n",
198
+ " return min(distances)\n",
199
+ "\n",
200
+ " if not misclassified.empty:\n",
201
+ " num_misclassified = len(misclassified) + (18 - len(df_group))\n",
202
+ " margin = misclassified.apply(distance_to_boundary, axis=1).mean()\n",
203
+ " else:\n",
204
+ " num_misclassified = 0\n",
205
+ " margin = 0.0\n",
206
+ "\n",
207
+ " margins.append((model.name, margin, num_misclassified))\n",
208
+ "\n",
209
+ "\n",
210
+ "# Combine all into one DataFrame\n",
211
+ "combined_df = pd.concat(all_data, ignore_index=True)\n",
212
+ "margins_df = pd.DataFrame(margins, columns=[\"model_name\", \"misclassification_margin\", \"num_misclassified\"])\n",
213
+ "\n",
214
+ "# --- Plotting ---\n",
215
+ "\n",
216
+ "with plt.style.context(\"default\"):\n",
217
+ "\n",
218
+ " LARGE_SIZE = 10\n",
219
+ " MEDIUM_SIZE = 8\n",
220
+ " SMALL_SIZE = 6\n",
221
+ "\n",
222
+ " plt.rcParams.update({\n",
223
+ " \"font.size\": SMALL_SIZE,\n",
224
+ " \"axes.titlesize\": MEDIUM_SIZE,\n",
225
+ " \"axes.labelsize\": MEDIUM_SIZE,\n",
226
+ " \"xtick.labelsize\": SMALL_SIZE,\n",
227
+ " \"ytick.labelsize\": SMALL_SIZE,\n",
228
+ " \"legend.fontsize\": SMALL_SIZE,\n",
229
+ " \"figure.titlesize\": LARGE_SIZE,\n",
230
+ " })\n",
231
+ "\n",
232
+ " fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6, 4), sharex=False, gridspec_kw={\"height_ratios\": [3, 1.5]})\n",
233
+ "\n",
234
+ " # --- Main Stripplot ---\n",
235
+ " sns.stripplot(\n",
236
+ " data=combined_df,\n",
237
+ " x=\"neg_heat\",\n",
238
+ " y=\"model_name\",\n",
239
+ " hue=\"class\",\n",
240
+ " size=2,\n",
241
+ " palette=color_mapping,\n",
242
+ " dodge=True,\n",
243
+ " jitter=0.1,\n",
244
+ " alpha=1,\n",
245
+ " ax=ax1,\n",
246
+ " )\n",
247
+ "\n",
248
+ " xmin, xmax = ax1.get_xlim()\n",
249
+ "\n",
250
+ " ax1.axvspan(xmin, 35, color=color_mapping[\"General\"], alpha=0.1, label=\"General\")\n",
251
+ " ax1.axvspan(35, 50, color=color_mapping[\"Flue Gas\"], alpha=0.1, label=\"Flue Gas\")\n",
252
+ " ax1.axvspan(50, 100, color=color_mapping[\"DAC\"], alpha=0.1, label=\"DAC\")\n",
253
+ "\n",
254
+ " ax1.axvline(x=35, linestyle=\"--\", color=\"gray\", label=\"Exp. $\\\\mathregular{CO_2}$ $Q_\\\\text{st}$ = 35 kJ/mol\")\n",
255
+ " ax1.axvline(x=50, linestyle=\"--\", color=\"gray\", label=\"Exp. $\\\\mathregular{CO_2}$ $Q_\\\\text{st}$ = 50 kJ/mol\")\n",
256
+ " ax1.axvline(x=100, linestyle=\"--\", color=\"gray\", label=\"Exp. $\\\\mathregular{CO_2}$ $Q_\\\\text{st}$ = 100 kJ/mol\")\n",
257
+ "\n",
258
+ " ax1.set_xscale(\"log\")\n",
259
+ " ax1.set_xlabel(\"Heat of $\\\\mathregular{CO_2}$ Adsorption $Q_\\\\text{st}$ [kJ/mol]\")\n",
260
+ " ax1.set_ylabel(\"\")\n",
261
+ " ax1.set_xlim(xmin, xmax)\n",
262
+ "\n",
263
+ " yticks = ax1.get_yticks()\n",
264
+ " yticks = np.array(yticks)\n",
265
+ " yticks = yticks[np.isfinite(yticks)] # Remove any NaNs\n",
266
+ "\n",
267
+ " # Draw horizontal lines between models (skip the last one)\n",
268
+ " for y in yticks[:-1] + np.diff(yticks) / 2:\n",
269
+ " ax1.axhline(y=y, color=\"gray\", linestyle=\":\", linewidth=0.7, alpha=0.5, zorder=0)\n",
270
+ "\n",
271
+ " handles, labels = ax1.get_legend_handles_labels()\n",
272
+ " legend_dict = dict(zip(labels, handles, strict=False))\n",
273
+ "\n",
274
+ " desired_order = [\n",
275
+ " \"General\", \"Exp. $\\\\mathregular{CO_2}$ $Q_\\\\text{st}$ = 35 kJ/mol\", \"Flue Gas\",\n",
276
+ " \"Exp. $\\\\mathregular{CO_2}$ $Q_\\\\text{st}$ = 50 kJ/mol\", \"DAC\", \"Exp. $\\\\mathregular{CO_2}$ $Q_\\\\text{st}$ = 100 kJ/mol\"\n",
277
+ " ]\n",
278
+ "\n",
279
+ " ordered_handles = [legend_dict[label] for label in desired_order if label in legend_dict]\n",
280
+ "\n",
281
+ " ax1.legend(\n",
282
+ " ordered_handles,\n",
283
+ " desired_order,\n",
284
+ " loc=\"lower center\",\n",
285
+ " bbox_to_anchor=(0.5, 1),\n",
286
+ " ncol=3,\n",
287
+ " frameon=True\n",
288
+ " )\n",
289
+ "\n",
290
+ "\n",
291
+ " ax1.spines[\"top\"].set_visible(False)\n",
292
+ " ax1.spines[\"right\"].set_visible(False)\n",
293
+ "\n",
294
+ " # --- Misclassification Margin Barplot ---\n",
295
+ "\n",
296
+ " # Sort by error margin\n",
297
+ " margins_df_sorted = margins_df.sort_values(by=\"misclassification_margin\", ascending=True)\n",
298
+ "\n",
299
+ " # Extract color values in order\n",
300
+ " bar_colors = [method_color_mapping[m] for m in margins_df_sorted[\"model_name\"]]\n",
301
+ "\n",
302
+ " sns.scatterplot(\n",
303
+ " data=margins_df_sorted,\n",
304
+ " x=\"num_misclassified\",\n",
305
+ " y=\"misclassification_margin\",\n",
306
+ " hue=\"model_name\",\n",
307
+ " palette=bar_colors,\n",
308
+ " ax=ax2\n",
309
+ " )\n",
310
+ "\n",
311
+ " for _, row in margins_df_sorted.iterrows():\n",
312
+ " x = row[\"num_misclassified\"]\n",
313
+ " y = row[\"misclassification_margin\"]\n",
314
+ " model = row[\"model_name\"]\n",
315
+ " color = bar_colors[margins_df_sorted[\"model_name\"].tolist().index(model)]\n",
316
+ "\n",
317
+ " ax2.text(\n",
318
+ " x+0.1,\n",
319
+ " y,\n",
320
+ " f\"{y:.2f}\",\n",
321
+ " fontsize=SMALL_SIZE,\n",
322
+ " ha=\"left\",\n",
323
+ " va=\"bottom\",\n",
324
+ " color=color,\n",
325
+ " alpha=0.9\n",
326
+ " )\n",
327
+ "\n",
328
+ " ax2.set_ylabel(\"Misclass. margin [kJ/mol]\")\n",
329
+ " ax2.set_xlabel(\"Missing + misclass. count\")\n",
330
+ " ax2.spines[\"top\"].set_visible(False)\n",
331
+ " ax2.spines[\"right\"].set_visible(False)\n",
332
+ " # ax2.set_xticklabels(margins_df_sorted[\"model_name\"], rotation=45)\n",
333
+ " ax2.set_yscale(\"log\")\n",
334
+ "\n",
335
+ " handles, labels = ax2.get_legend_handles_labels()\n",
336
+ " legend_dict = dict(zip(labels, handles, strict=False))\n",
337
+ " ax2.legend(\n",
338
+ " legend_dict.values(),\n",
339
+ " legend_dict.keys(),\n",
340
+ " loc=\"upper left\",\n",
341
+ " bbox_to_anchor=(0, 1),\n",
342
+ " ncol=3,\n",
343
+ " frameon=True\n",
344
+ " )\n",
345
+ "\n",
346
+ " plt.tight_layout()\n",
347
+ " plt.savefig(\"mof-misclassification_margin.pdf\", bbox_inches=\"tight\")\n",
348
+ " plt.show()\n"
349
+ ]
350
+ },
351
+ {
352
+ "cell_type": "code",
353
+ "execution_count": null,
354
+ "metadata": {},
355
+ "outputs": [],
356
+ "source": []
357
+ }
358
+ ],
359
+ "metadata": {
360
+ "kernelspec": {
361
+ "display_name": "mlip-arena",
362
+ "language": "python",
363
+ "name": "mlip-arena"
364
+ },
365
+ "language_info": {
366
+ "codemirror_mode": {
367
+ "name": "ipython",
368
+ "version": 3
369
+ },
370
+ "file_extension": ".py",
371
+ "mimetype": "text/x-python",
372
+ "name": "python",
373
+ "nbconvert_exporter": "python",
374
+ "pygments_lexer": "ipython3",
375
+ "version": "3.11.8"
376
+ }
377
+ },
378
+ "nbformat": 4,
379
+ "nbformat_minor": 2
380
+ }
examples/mof/classification/input.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f66941e6898e0b0dbef120b2b17bd6828a1cf4db6aad94e1fd83aaa8b21acd84
3
+ size 286336
examples/mof/classification/mof-misclassification_margin.pdf ADDED
Binary file (31.9 kB). View file
 
examples/wbm_ev/ALIGNN_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1d3f3d2992c02464fdc5a4d58ca05de553dffab355b08e99a46d3b6d2495d11
3
- size 368547
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:875ed54dbc8766f4cfcdfd5e6d628fca9d1a8866b1b29cd1a32be8bb966303ef
3
+ size 368670
examples/wbm_ev/CHGNet_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50bbb0ed4ba3e8af06c2a90e927182132be1f988643dd7b6844d39fe4dd1084c
3
- size 357683
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62fc23bf5bc94ba30c32581a8c57131a9125993a3f0e380cb3800220b197b666
3
+ size 357806
examples/wbm_ev/M3GNet_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:411b2f619314dafb34349a65d985b7b881cb21687ecb9a853da29fc21d6fa714
3
- size 357786
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd34bfe4650c2be26e7cfd75d747885ca265cbbc7fb769222d1f7e304fbd6de3
3
+ size 357909
examples/wbm_ev/MACE-MP(M)_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:877cd9e9407f9402fc01510d051ad19db815e9794c87ee8402f306e2afafd45e
3
- size 359765
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f54e1c2b7ec686c2a353f8092633b13e66fac8410642027ef3481aedf350b0b
3
+ size 359889
examples/wbm_ev/MACE-MPA_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee5167ef38acf130548c5f5cbf78fedd14b7de7df47c72d771997f4e13302c0b
3
- size 356642
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:249e8f0283dda9c87ec787ea0945a92e8ba0d4bf7b00bf823a4850283f06cfde
3
+ size 356765
examples/wbm_ev/MatterSim_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:107a30bb5541a861e4141be0db3de210ca2229a22d975f732f107f0e6afdeb0f
3
- size 356292
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cea2cf80561a7d5f6696fccd45754df9d417bd0310f06827b9c02fc7414f278
3
+ size 356416
examples/wbm_ev/ORBv2_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01525c5b2e37b8d930c94ee2dc257b8d7c36b24111206ba2efc499a8bc172fcd
3
- size 357949
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e92484ae9dc84cf35ce7c4780c9a28d27fb29779fd31c3d494ac95acf54c3e8
3
+ size 358072
examples/wbm_ev/SevenNet_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83d9c45216940262575bc7386c62f8b79bb3ac5e63b8ab33ca4307976bb9796f
3
- size 358345
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d19ee80d1f6c13765ad134d1356a029c175b7c885035fb68380b2dd559645ad
3
+ size 358468
examples/wbm_ev/eSEN.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:841febd80ec1024fa186ab05e5f9d7c96a0605d90c4840415ecf41ac89132aee
3
+ size 410695
examples/wbm_ev/eSEN_processed.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1602624f625a7c258f76604718fec7998fa6f88400c33ca2d5116681a5e8dd9d
3
+ size 356216
examples/wbm_ev/eqV2(OMat)_processed.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50f318dce825e939a5c24aa8789acb2952865e0c682462f99cfed204ea9fba64
3
- size 356693
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd43459abbcb6bf33b3dfecf131a6116d799f06aeb2b9e0a6c66e5d5db2ebdd6
3
+ size 356817
examples/wbm_ev/run.py CHANGED
@@ -123,41 +123,41 @@ def submit_tasks():
123
  futures.append(result)
124
  return [f.result(raise_on_failure=False) for f in futures]
125
 
126
-
127
- nodes_per_alloc = 1
128
- gpus_per_alloc = 1
129
- ntasks = 1
130
-
131
- cluster_kwargs = dict(
132
- cores=1,
133
- memory="64 GB",
134
- processes=1,
135
- shebang="#!/bin/bash",
136
- account="m3828",
137
- walltime="00:30:00",
138
- # job_mem="0",
139
- job_script_prologue=[
140
- "source ~/.bashrc",
141
- "module load python",
142
- "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena",
143
- ],
144
- job_directives_skip=["-n", "--cpus-per-task", "-J"],
145
- job_extra_directives=[
146
- "-J wbm_ev",
147
- "-q debug",
148
- f"-N {nodes_per_alloc}",
149
- "-C gpu",
150
- f"-G {gpus_per_alloc}",
151
- "--exclusive",
152
- ],
153
- )
154
-
155
- cluster = SLURMCluster(**cluster_kwargs)
156
- print(cluster.job_script())
157
- cluster.adapt(minimum_jobs=2, maximum_jobs=2)
158
- client = Client(cluster)
159
-
160
- submit_tasks.with_options(
161
- task_runner=DaskTaskRunner(address=client.scheduler.address),
162
- log_prints=True,
163
- )()
 
123
  futures.append(result)
124
  return [f.result(raise_on_failure=False) for f in futures]
125
 
126
+ if __name__ == "__main__":
127
+ nodes_per_alloc = 1
128
+ gpus_per_alloc = 1
129
+ ntasks = 1
130
+
131
+ cluster_kwargs = dict(
132
+ cores=1,
133
+ memory="64 GB",
134
+ processes=1,
135
+ shebang="#!/bin/bash",
136
+ account="m3828",
137
+ walltime="00:30:00",
138
+ # job_mem="0",
139
+ job_script_prologue=[
140
+ "source ~/.bashrc",
141
+ "module load python",
142
+ "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena",
143
+ ],
144
+ job_directives_skip=["-n", "--cpus-per-task", "-J"],
145
+ job_extra_directives=[
146
+ "-J wbm_ev",
147
+ "-q debug",
148
+ f"-N {nodes_per_alloc}",
149
+ "-C gpu",
150
+ f"-G {gpus_per_alloc}",
151
+ "--exclusive",
152
+ ],
153
+ )
154
+
155
+ cluster = SLURMCluster(**cluster_kwargs)
156
+ print(cluster.job_script())
157
+ cluster.adapt(minimum_jobs=2, maximum_jobs=2)
158
+ client = Client(cluster)
159
+
160
+ submit_tasks.with_options(
161
+ task_runner=DaskTaskRunner(address=client.scheduler.address),
162
+ log_prints=True,
163
+ )()
examples/wbm_ev/summary.csv CHANGED
@@ -1,10 +1,11 @@
1
  model,rank,rank-aggregation,energy-diff-flip-times,tortuosity,spearman-compression-energy,spearman-compression-derivative,spearman-tension-energy,missing
2
- MACE-MPA,1,11,1.0,1.000675741122765,-0.9983393939393939,0.9993090909090908,0.9987181818181818,0
3
- CHGNet,2,14,1.0,1.0006287770651048,-0.9982787878787878,0.9439636363636364,0.999090909090909,0
4
- MatterSim,3,19,1.009,1.000567338639546,-0.9980969696969696,0.9997090909090908,0.9937541835359507,0
5
- eqV2(OMat),4,22,1.035,1.0008346292192054,-0.9982060606060604,0.9972242424242423,0.9986454545454545,0
6
- M3GNet,5,24,1.002,1.0020010929112253,-0.9975878787878787,0.997442424242424,0.9964676571137886,0
7
- ORBv2,6,29,1.058,1.004064906459821,-0.9977696969696969,0.970751515151515,0.9976,0
8
- SevenNet,7,33,1.034,1.0100246177550205,-0.9951636363636364,0.9465575757575757,0.9947048195608054,0
9
- MACE-MP(M),8,35,1.121,1.0807128149289842,-0.9438060606060605,0.9011878787878788,0.9987454545454546,0
10
- ALIGNN,9,46,3.909,1.3756517739089669,-0.8892069391323368,0.7602706775644651,0.862085379002138,0
 
 
1
  model,rank,rank-aggregation,energy-diff-flip-times,tortuosity,spearman-compression-energy,spearman-compression-derivative,spearman-tension-energy,missing
2
+ eSEN,1,7,1.0,1.000402711291021,-0.9983393939393939,0.9999999999999999,0.9990454545454545,0
3
+ MACE-MPA,2,14,1.0,1.000675741122765,-0.9983393939393939,0.9993090909090908,0.9987181818181818,0
4
+ CHGNet,3,17,1.0,1.0006287770651048,-0.9982787878787878,0.9439636363636364,0.999090909090909,0
5
+ MatterSim,4,24,1.009,1.000567338639546,-0.9980969696969696,0.9997090909090908,0.9937541835359507,0
6
+ eqV2(OMat),5,27,1.035,1.0008346292192054,-0.9982060606060604,0.9972242424242423,0.9986454545454545,0
7
+ M3GNet,6,29,1.002,1.0020010929112253,-0.9975878787878787,0.997442424242424,0.9964676571137886,0
8
+ ORBv2,7,34,1.058,1.004064906459821,-0.9977696969696969,0.970751515151515,0.9976,0
9
+ SevenNet,8,38,1.034,1.0100246177550205,-0.9951636363636364,0.9465575757575757,0.9947048195608054,0
10
+ MACE-MP(M),9,40,1.121,1.0807128149289842,-0.9438060606060605,0.9011878787878788,0.9987454545454546,0
11
+ ALIGNN,10,51,3.909,1.3756517739089669,-0.8892069391323368,0.7602706775644651,0.862085379002138,0
examples/wbm_ev/summary.tex CHANGED
@@ -2,14 +2,15 @@
2
  \toprule
3
  model & rank & rank-aggregation & energy-diff-flip-times & tortuosity & spearman-compression-energy & spearman-compression-derivative & spearman-tension-energy & missing \\
4
  \midrule
5
- MACE-MPA & 1 & 11 & 1.000000 & 1.000676 & -0.998339 & 0.999309 & 0.998718 & 0 \\
6
- CHGNet & 2 & 14 & 1.000000 & 1.000629 & -0.998279 & 0.943964 & 0.999091 & 0 \\
7
- MatterSim & 3 & 19 & 1.009000 & 1.000567 & -0.998097 & 0.999709 & 0.993754 & 0 \\
8
- eqV2(OMat) & 4 & 22 & 1.035000 & 1.000835 & -0.998206 & 0.997224 & 0.998645 & 0 \\
9
- M3GNet & 5 & 24 & 1.002000 & 1.002001 & -0.997588 & 0.997442 & 0.996468 & 0 \\
10
- ORBv2 & 6 & 29 & 1.058000 & 1.004065 & -0.997770 & 0.970752 & 0.997600 & 0 \\
11
- SevenNet & 7 & 33 & 1.034000 & 1.010025 & -0.995164 & 0.946558 & 0.994705 & 0 \\
12
- MACE-MP(M) & 8 & 35 & 1.121000 & 1.080713 & -0.943806 & 0.901188 & 0.998745 & 0 \\
13
- ALIGNN & 9 & 46 & 3.909000 & 1.375652 & -0.889207 & 0.760271 & 0.862085 & 0 \\
 
14
  \bottomrule
15
  \end{tabular}
 
2
  \toprule
3
  model & rank & rank-aggregation & energy-diff-flip-times & tortuosity & spearman-compression-energy & spearman-compression-derivative & spearman-tension-energy & missing \\
4
  \midrule
5
+ eSEN & 1 & 7 & 1.000000 & 1.000403 & -0.998339 & 1.000000 & 0.999045 & 0 \\
6
+ MACE-MPA & 2 & 14 & 1.000000 & 1.000676 & -0.998339 & 0.999309 & 0.998718 & 0 \\
7
+ CHGNet & 3 & 17 & 1.000000 & 1.000629 & -0.998279 & 0.943964 & 0.999091 & 0 \\
8
+ MatterSim & 4 & 24 & 1.009000 & 1.000567 & -0.998097 & 0.999709 & 0.993754 & 0 \\
9
+ eqV2(OMat) & 5 & 27 & 1.035000 & 1.000835 & -0.998206 & 0.997224 & 0.998645 & 0 \\
10
+ M3GNet & 6 & 29 & 1.002000 & 1.002001 & -0.997588 & 0.997442 & 0.996468 & 0 \\
11
+ ORBv2 & 7 & 34 & 1.058000 & 1.004065 & -0.997770 & 0.970752 & 0.997600 & 0 \\
12
+ SevenNet & 8 & 38 & 1.034000 & 1.010025 & -0.995164 & 0.946558 & 0.994705 & 0 \\
13
+ MACE-MP(M) & 9 & 40 & 1.121000 & 1.080713 & -0.943806 & 0.901188 & 0.998745 & 0 \\
14
+ ALIGNN & 10 & 51 & 3.909000 & 1.375652 & -0.889207 & 0.760271 & 0.862085 & 0 \\
15
  \bottomrule
16
  \end{tabular}
mlip_arena/models/registry.yaml CHANGED
@@ -211,7 +211,7 @@ eSEN:
211
  module: externals
212
  class: eSEN
213
  family: fairchem
214
- package: fairchem-core==1.9.0
215
  checkpoint: esen_30m_oam.pt
216
  username: fairchem # HF handle
217
  last-update: 2025-04-21
@@ -222,9 +222,11 @@ eSEN:
222
  - Alexandria
223
  gpu-tasks:
224
  - homonuclear-diatomics
 
 
225
  prediction: EFS
226
  nvt: true
227
- npt: false # https://github.com/FAIR-Chem/fairchem/issues/888, https://github.com/atomind-ai/mlip-arena/issues/17
228
  date: 2025-04-14
229
  github: https://github.com/FAIR-Chem/fairchem
230
  doi: https://arxiv.org/abs/2502.12147
 
211
  module: externals
212
  class: eSEN
213
  family: fairchem
214
+ package: fairchem-core==1.10.0
215
  checkpoint: esen_30m_oam.pt
216
  username: fairchem # HF handle
217
  last-update: 2025-04-21
 
222
  - Alexandria
223
  gpu-tasks:
224
  - homonuclear-diatomics
225
+ - wbm_ev
226
+ - eos_bulk
227
  prediction: EFS
228
  nvt: true
229
+ npt: true
230
  date: 2025-04-14
231
  github: https://github.com/FAIR-Chem/fairchem
232
  doi: https://arxiv.org/abs/2502.12147
pyproject.toml CHANGED
@@ -8,7 +8,7 @@ version="0.1.0"
8
  authors=[
9
  {name="Yuan Chiang", email="cyrusyc@lbl.gov"},
10
  ]
11
- description="Fair and transparent benchmark of machine-learned interatomic potentials (MLIPs), beyond basic error metrics"
12
  readme=".github/README.md"
13
  requires-python=">=3.10"
14
  keywords=[
@@ -71,7 +71,7 @@ matgl = [
71
  ]
72
  fairchem = [
73
  "hydra-core",
74
- "fairchem-core@git+https://github.com/facebookresearch/fairchem.git#subdirectory=packages/fairchem-core",
75
  ]
76
  orb = [
77
  "orb-models==0.4.0",
@@ -174,4 +174,4 @@ ignore = [
174
  "PD901",
175
  ]
176
  fixable = ["ALL"]
177
- pydocstyle.convention = "google"
 
8
  authors=[
9
  {name="Yuan Chiang", email="cyrusyc@lbl.gov"},
10
  ]
11
+ description="Fair and transparent benchmark of machine learning interatomic potentials (MLIPs), beyond error-based regression metrics"
12
  readme=".github/README.md"
13
  requires-python=">=3.10"
14
  keywords=[
 
71
  ]
72
  fairchem = [
73
  "hydra-core",
74
+ "fairchem-core==1.10.0",
75
  ]
76
  orb = [
77
  "orb-models==0.4.0",
 
174
  "PD901",
175
  ]
176
  fixable = ["ALL"]
177
+ pydocstyle.convention = "google"