Spaces:

atomind
/

mlip-arena

Running

mlip-arena / mlip_arena /tasks /eos_alloy /flow.py

github-actions[ci]

Clean sync from main branch - 2025-10-16 23:00:12

afe68b4 about 1 month ago

4.12 kB

	from functools import partial
	from pathlib import Path

	import pandas as pd
	from huggingface_hub import hf_hub_download
	from prefect import Task, flow, task
	from prefect.client.schemas.objects import TaskRun
	from prefect.futures import wait
	from prefect.states import State

	from ase.db import connect
	from mlip_arena.data.local import SafeHDFStore
	from mlip_arena.models import REGISTRY, MLIPEnum
	from mlip_arena.tasks.eos import run as EOS


	@task
	def get_atoms_from_db(db_path: Path \| str):
	db_path = Path(db_path)
	if not db_path.exists():
	db_path = hf_hub_download(
	repo_id="atomind/mlip-arena",
	repo_type="dataset",
	subfolder=f"{Path(__file__).parent.name}",
	filename=str(db_path),
	)
	with connect(db_path) as db:
	for row in db.select():
	yield row.toatoms()


	def save_to_hdf(
	tsk: Task, run: TaskRun, state: State, fpath: Path \| str, table_name: str
	):
	"""
	Define a hook on completion of EOS task to save results to HDF5 file.
	"""

	if run.state.is_failed():
	return

	result = run.state.result(raise_on_failure=False)

	if not isinstance(result, dict):
	return

	try:
	atoms = result["atoms"]
	calculator_name = (
	run.task_inputs["calculator_name"] or result["calculator_name"]
	)

	energies = [float(e) for e in result["eos"]["energies"]]

	formula = atoms.get_chemical_formula()

	df = pd.DataFrame(
	{
	"method": calculator_name,
	"formula": formula,
	"total_run_time": run.total_run_time,
	"v0": result["v0"],
	"e0": result["e0"],
	"b0": result["b0"],
	"b1": result["b1"],
	"volume": result["eos"]["volumes"],
	"energy": energies,
	}
	)

	fpath = Path(fpath)
	fpath = fpath.with_stem(fpath.stem + f"_{calculator_name}")

	family_path = Path(__file__).parent / REGISTRY[calculator_name]["family"]
	family_path.mkdir(parents=True, exist_ok=True)

	df.to_json(family_path / f"{calculator_name}_{formula}.json", indent=2)

	with SafeHDFStore(fpath, mode="a") as store:
	store.append(
	table_name,
	df,
	format="table",
	data_columns=True,
	min_itemsize={"formula": 50, "method": 20},
	)
	except Exception as e:
	print(e)


	@flow(
	name="EOS Alloy"
	)
	def run(
	db_path: Path \| str,
	out_path: Path \| str,
	table_name: str,
	optimizer="FIRE",
	optimizer_kwargs=None,
	filter="FrechetCell",
	filter_kwargs=None,
	criterion=dict(fmax=0.1, steps=1000),
	max_abs_strain=0.20,
	concurrent=False,
	cache=True,
	):
	EOS_ = EOS.with_options(
	on_completion=[partial(save_to_hdf, fpath=out_path, table_name=table_name)],
	refresh_cache=not cache,
	)

	futures = []
	for atoms in get_atoms_from_db(db_path):
	for mlip in MLIPEnum:
	if not REGISTRY[mlip.name]["npt"]:
	continue
	if Path(__file__).parent.name not in (
	REGISTRY[mlip.name].get("cpu-tasks", [])
	+ REGISTRY[mlip.name].get("gpu-tasks", [])
	):
	continue
	future = EOS_.submit(
	atoms=atoms,
	calculator_name=mlip.name,
	calculator_kwargs=dict(),
	optimizer=optimizer,
	optimizer_kwargs=optimizer_kwargs,
	filter=filter,
	filter_kwargs=filter_kwargs,
	criterion=criterion,
	max_abs_strain=max_abs_strain,
	concurrent=concurrent,
	persist_opt=cache,
	cache_opt=cache,
	# return_state=True
	)
	futures.append(future)

	wait(futures)

	return [
	f.result(timeout=None, raise_on_failure=False)
	for f in futures
	if f.state.is_completed()
	]