Spaces:

atomind
/

mlip-arena

Running

mlip-arena / mlip_arena /tasks /combustion /flow.py

github-actions[ci]

Clean sync from main branch - 2025-10-16 23:00:12

afe68b4 about 2 months ago

5.43 kB

	from __future__ import annotations

	from pathlib import Path

	import numpy as np
	import pandas as pd
	from ase import Atoms, io, units
	from ase.calculators.calculator import BaseCalculator
	from ase.neighborlist import NeighborList, natural_cutoffs
	from prefect import flow, task
	from tqdm.auto import tqdm

	from mlip_arena.models import MLIPEnum
	from mlip_arena.tasks.md import run as MD
	from mlip_arena.tasks.utils import get_calculator


	def identify_water_molecules(atoms):
	nl = NeighborList(natural_cutoffs(atoms), self_interaction=False, bothways=True)
	nl.update(atoms)

	water_molecule_count = 0
	visited_atoms = set()

	for atom in atoms:
	if atom.symbol == "O" and atom.index not in visited_atoms:
	indices, offsets = nl.get_neighbors(atom.index)
	hydrogen_indices = [idx for idx in indices if atoms[idx].symbol == "H"]
	if len(hydrogen_indices) == 2:
	water_molecule_count += 1
	visited_atoms.update([atom.index, *hydrogen_indices])

	return water_molecule_count


	@task
	def get_runtime_stats(traj: list[Atoms], atoms0: Atoms):
	restarts = []
	steps, times = [], []
	Ts, Ps, PEs, KEs = [], [], [], []
	com_drifts = []
	nproducts = []

	for atoms in tqdm(traj, desc="Analyzing trajectory"):
	try:
	energy = atoms.get_potential_energy()
	assert np.isfinite(energy), f"invalid energy: {energy}"

	restarts.append(atoms.info["restart"])
	times.append(atoms.info["datetime"])
	steps.append(atoms.info["step"])

	PEs.append(energy)
	KEs.append(atoms.get_kinetic_energy())
	Ts.append(atoms.get_temperature())

	try:
	Ps.append(atoms.get_stress()[:3].mean())
	except Exception:
	Ps.append(np.nan)

	com_drifts.append(
	(atoms.get_center_of_mass() - atoms0.get_center_of_mass()).tolist()
	)
	nproducts.append(identify_water_molecules(atoms))
	except Exception:
	continue

	restarts = np.array(restarts)
	times = np.array(times)
	steps = np.array(steps)

	# Identify unique blocks
	unique_restarts = np.unique(restarts)

	total_time_seconds = 0
	total_steps = 0

	# Iterate over unique blocks to calculate averages
	for block in unique_restarts:
	# Get the indices corresponding to the current block
	# indices = np.where(restarts == block)[0]
	indices = restarts == block
	# Extract the corresponding data values
	block_time = times[indices][-1] - times[indices][0]
	total_time_seconds += block_time.total_seconds()
	total_steps += steps[indices][-1] - steps[indices][0]

	target_steps = traj[1].info["target_steps"]
	natoms = len(atoms0)

	return {
	"natoms": natoms,
	"total_time_seconds": total_time_seconds,
	"total_steps": total_steps,
	"steps_per_second": total_steps / total_time_seconds
	if total_time_seconds != 0
	else 0,
	"seconds_per_step": total_time_seconds / total_steps
	if total_steps != 0
	else float("inf"),
	"seconds_per_step_per_atom": total_time_seconds / total_steps / natoms
	if total_steps != 0
	else float("inf"),
	"energies": PEs,
	"kinetic_energies": KEs,
	"temperatures": Ts,
	"pressures": Ps,
	"target_steps": target_steps,
	"final_step": steps[-1] if len(steps) != 0 else 0,
	"timestep": steps,
	"com_drifts": com_drifts,
	"nproducts": nproducts,
	}


	@flow
	def hydrogen_combustion(model: str \| BaseCalculator, run_dir: Path):
	"""Run hydrogen combustion simulation and analyze results."""
	atoms = io.read(Path(__file__).parent / "H256O128.extxyz")
	assert isinstance(atoms, Atoms)

	model = MLIPEnum[model] if isinstance(model, str) else model
	calculator = get_calculator(model) if isinstance(model, MLIPEnum) else model

	model_name = model.name if isinstance(model, MLIPEnum) else calculator.__class__.__name__
	traj_file = run_dir / f"{model_name}_{atoms.get_chemical_formula()}.traj"
	json_fpath = run_dir / f"{model_name}_{atoms.get_chemical_formula()}.json"

	result = MD(
	atoms=atoms,
	calculator=calculator,
	ensemble="nvt",
	dynamics="nose-hoover",
	time_step=None,
	dynamics_kwargs=dict(ttime=25 * units.fs, pfactor=None),
	total_time=1_000_000,
	temperature=[300, 3000, 3000, 300],
	pressure=None,
	velocity_seed=0,
	traj_file=traj_file,
	traj_interval=1000,
	restart=True,
	)

	traj = io.read(traj_file, index=":")

	assert len(traj) >= 2000, (
	f"Trajectory has only {len(traj)} frames and is not complete."
	)

	assert np.allclose(traj[0].positions, atoms.positions), "Initial positions do not match."

	stats = get_runtime_stats(traj, atoms0=traj[0])

	formula = traj_file.stem.split("_")[-1]
	reaction = "hydrogen"
	max_nproducts = 128 # Maximum possible number of water molecules (for H256O128)

	data = {
	"formula": formula,
	"method": model_name,
	"reaction": reaction,
	**stats,
	"yield": stats["nproducts"][-1] / max_nproducts,
	}

	df = pd.DataFrame([data])
	df.to_json(json_fpath, orient="records")

	return result