Spaces:

atomind
/

mlip-arena

Running

App Files Files Community

Yuan (Cyrus) Chiang commited on 20 days ago

Commit

51638da

•

1 Parent(s): aeaedb4

refactor eos into task (#28)

Browse files

* refactor eos into task

* task name and cache

* avoid duplicate test on PR branches

Files changed (8) hide show

.github/README.md +1 -1
.github/workflows/test.yaml +5 -1
mlip_arena/tasks/{eos/run.py → eos.py} +14 -23
mlip_arena/tasks/eos/__init__.py +0 -0
mlip_arena/tasks/eos/run.ipynb +0 -600
mlip_arena/tasks/md.py +29 -13
mlip_arena/tasks/optimize.py +24 -14
tests/test_eos.py +28 -17

.github/README.md CHANGED Viewed

@@ -53,7 +53,7 @@ streamlit run serve/app.py
 > The following are some tasks implemented:
 > - [Prefect structure optimization (OPT)](../mlip_arena/tasks/optimize.py)
 > - [Prefect molecular dynamics (MD)](../mlip_arena/tasks/md.py)
-> - [Prefect equation of states (EOS)](../mlip_arena/tasks/eos/run.py)
 1. Follow the task template to implement the task class and upload the script along with metadata to the MLIP Arena [here](../mlip_arena/tasks/README.md).
 2. Code a benchmark script to evaluate the performance of your model on the task. The script should be able to load the model and the dataset, and output the evaluation metrics.

 > The following are some tasks implemented:
 > - [Prefect structure optimization (OPT)](../mlip_arena/tasks/optimize.py)
 > - [Prefect molecular dynamics (MD)](../mlip_arena/tasks/md.py)
+> - [Prefect equation of states (EOS)](../mlip_arena/tasks/eos.py)
 1. Follow the task template to implement the task class and upload the script along with metadata to the MLIP Arena [here](../mlip_arena/tasks/README.md).
 2. Code a benchmark script to evaluate the performance of your model on the task. The script should be able to load the model and the dataset, and output the evaluation metrics.

.github/workflows/test.yaml CHANGED Viewed

@@ -1,6 +1,10 @@
 name: Python Test
-on: [push, pull_request]
 # env:
 #   UV_SYSTEM_PYTHON: 1

 name: Python Test
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
 # env:
 #   UV_SYSTEM_PYTHON: 1

mlip_arena/tasks/{eos/run.py → eos.py} RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-Define equation of state flows.
 https://github.com/materialsvirtuallab/matcalc/blob/main/matcalc/eos.py
 """
@@ -9,36 +9,25 @@ from __future__ import annotations
 from typing import TYPE_CHECKING
 import numpy as np
 from ase import Atoms
 from ase.filters import *  # type: ignore
 from ase.optimize import *  # type: ignore
 from ase.optimize.optimize import Optimizer
-from prefect import flow
-from prefect.futures import wait
-from prefect.runtime import flow_run, task_run
-from pymatgen.analysis.eos import BirchMurnaghan
 from mlip_arena.models import MLIPEnum
 from mlip_arena.tasks.optimize import run as OPT
 if TYPE_CHECKING:
     from ase.filters import Filter
-def generate_flow_run_name():
-    flow_name = flow_run.flow_name
-    parameters = flow_run.parameters
-    atoms = parameters["atoms"]
-    calculator_name = parameters["calculator_name"]
-    return f"{flow_name}: {atoms.get_chemical_formula()} - {calculator_name}"
-def generate_task_run_name():
     task_name = task_run.task_name
     parameters = task_run.parameters
     atoms = parameters["atoms"]
@@ -47,10 +36,12 @@ def generate_task_run_name():
     return f"{task_name}: {atoms.get_chemical_formula()} - {calculator_name}"
-# https://docs.prefect.io/3.0/develop/write-tasks#custom-retry-behavior
-# @task(task_run_name=generate_task_run_name)
-@flow(flow_run_name=generate_flow_run_name, validate_parameters=False)
-def fit(
     atoms: Atoms,
     calculator_name: str | MLIPEnum,
     calculator_kwargs: dict | None,

 """
+Define equation of state task.
 https://github.com/materialsvirtuallab/matcalc/blob/main/matcalc/eos.py
 """
 from typing import TYPE_CHECKING
 import numpy as np
+from prefect import task
+from prefect.futures import wait
+from prefect.runtime import task_run
+from prefect.tasks import task_input_hash
 from ase import Atoms
 from ase.filters import *  # type: ignore
 from ase.optimize import *  # type: ignore
 from ase.optimize.optimize import Optimizer
 from mlip_arena.models import MLIPEnum
 from mlip_arena.tasks.optimize import run as OPT
+from pymatgen.analysis.eos import BirchMurnaghan
 if TYPE_CHECKING:
     from ase.filters import Filter
+def _generate_task_run_name():
     task_name = task_run.task_name
     parameters = task_run.parameters
     atoms = parameters["atoms"]
     return f"{task_name}: {atoms.get_chemical_formula()} - {calculator_name}"
+@task(
+    name="EOS",
+    task_run_name=_generate_task_run_name,
+    cache_key_fn=task_input_hash,
+)
+def run(
     atoms: Atoms,
     calculator_name: str | MLIPEnum,
     calculator_kwargs: dict | None,

mlip_arena/tasks/eos/__init__.py DELETED Viewed

File without changes

mlip_arena/tasks/eos/run.ipynb DELETED Viewed

@@ -1,600 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No module named 'deepmd'\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import os\n",
-    "# from mp_api.client import MPRester\n",
-    "from dask.distributed import Client\n",
-    "from dask_jobqueue import SLURMCluster\n",
-    "from prefect import task, flow\n",
-    "from prefect.task_runners import ThreadPoolTaskRunner\n",
-    "from prefect_dask import DaskTaskRunner\n",
-    "from pymatgen.core.structure import Structure\n",
-    "from dotenv import load_dotenv\n",
-    "from ase import Atoms\n",
-    "from ase.io import write, read\n",
-    "from pathlib import Path\n",
-    "import pandas as pd\n",
-    "from prefect.futures import wait\n",
-    "\n",
-    "from mlip_arena.tasks.eos.run import fit as EOS\n",
-    "from mlip_arena.models import MLIPEnum\n",
-    "\n",
-    "load_dotenv()\n",
-    "\n",
-    "# MP_API_KEY = os.environ.get(\"MP_API_KEY\", None)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "MP Database version: 2023.11.1\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "bb6c1969c89840888c556f8fa59b4a67",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Retrieving SummaryDoc documents:   0%|          | 0/5135 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "\n",
-    "with MPRester(MP_API_KEY) as mpr:\n",
-    "    print(\"MP Database version:\", mpr.get_database_version())\n",
-    "\n",
-    "    summary_docs = mpr.materials.summary.search(\n",
-    "        num_elements=(1, 2),\n",
-    "        is_stable=True,\n",
-    "        fields=[\"material_id\", \"structure\", \"formula_pretty\"]\n",
-    "    )\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "atoms_list = []\n",
-    "\n",
-    "for doc in summary_docs:\n",
-    "\n",
-    "    structure = doc.structure\n",
-    "    assert isinstance(structure, Structure)\n",
-    "\n",
-    "    atoms = structure.to_ase_atoms()\n",
-    "\n",
-    "    atoms_list.append(atoms)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "write(\"all.extxyz\", atoms_list)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "atoms_list = read(\"all.extxyz\", index=':')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "#!/bin/bash\n",
-      "\n",
-      "#SBATCH -A matgen\n",
-      "#SBATCH --mem=0\n",
-      "#SBATCH -t 00:30:00\n",
-      "#SBATCH -N 1\n",
-      "#SBATCH -G 4\n",
-      "#SBATCH -q debug\n",
-      "#SBATCH -C gpu\n",
-      "#SBATCH -J eos\n",
-      "source ~/.bashrc\n",
-      "module load python\n",
-      "source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena\n",
-      "/pscratch/sd/c/cyrusyc/.conda/mlip-arena/bin/python -m distributed.cli.dask_worker tcp://128.55.64.49:36289 --name dummy-name --nthreads 1 --memory-limit 59.60GiB --nanny --death-timeout 60\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "nodes_per_alloc = 1\n",
-    "gpus_per_alloc = 4\n",
-    "ntasks = 1\n",
-    "\n",
-    "cluster_kwargs = {\n",
-    "    \"cores\": 1,\n",
-    "    \"memory\": \"64 GB\",\n",
-    "    \"shebang\": \"#!/bin/bash\",\n",
-    "    \"account\": \"matgen\",\n",
-    "    \"walltime\": \"00:30:00\",\n",
-    "    \"job_mem\": \"0\",\n",
-    "    \"job_script_prologue\": [\n",
-    "        \"source ~/.bashrc\",\n",
-    "        \"module load python\",\n",
-    "        \"source activate /pscratch/sd/c/cyrusyc/.conda/mlip-arena\",\n",
-    "    ],\n",
-    "    \"job_directives_skip\": [\"-n\", \"--cpus-per-task\", \"-J\"],\n",
-    "    \"job_extra_directives\": [f\"-N {nodes_per_alloc}\", f\"-G {gpus_per_alloc}\", \"-q debug\", \"-C gpu\", \"-J eos\"],\n",
-    "}\n",
-    "cluster = SLURMCluster(**cluster_kwargs)\n",
-    "\n",
-    "print(cluster.job_script())\n",
-    "cluster.adapt(minimum_jobs=2, maximum_jobs=2)\n",
-    "client = Client(cluster)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "from prefect.concurrency.sync import concurrency\n",
-    "from prefect.runtime import flow_run, task_run\n",
-    "\n",
-    "def postprocess(output, model: str, formula: str):\n",
-    "    row = {\n",
-    "        \"formula\": formula,\n",
-    "        \"method\": model,\n",
-    "        \"volumes\": output[\"eos\"][\"volumes\"],\n",
-    "        \"energies\": output[\"eos\"][\"energies\"],\n",
-    "        \"K\": output[\"K\"],\n",
-    "    }\n",
-    "\n",
-    "    fpath = Path(REGISTRY[model][\"family\"]) / f\"{model}.parquet\"\n",
-    "\n",
-    "    if not fpath.exists():\n",
-    "        fpath.parent.mkdir(parents=True, exist_ok=True)\n",
-    "        df = pd.DataFrame([row])  # Convert the dictionary to a DataFrame with a list\n",
-    "    else:\n",
-    "        df = pd.read_parquet(fpath)\n",
-    "        new_row = pd.DataFrame([row])  # Convert dictionary to DataFrame with a list\n",
-    "        df = pd.concat([df, new_row], ignore_index=True)\n",
-    "\n",
-    "    df.drop_duplicates(subset=[\"formula\", \"method\"], keep='last', inplace=True)\n",
-    "    df.to_parquet(fpath)\n",
-    "\n",
-    "\n",
-    "\n",
-    "task_runner = DaskTaskRunner(address=client.scheduler.address)\n",
-    "EOS = EOS.with_options(\n",
-    "    # task_runner=task_runner, \n",
-    "    log_prints=True,\n",
-    "    timeout_seconds=120, \n",
-    "    # result_storage=None\n",
-    ")\n",
-    "\n",
-    "from prefect import get_client\n",
-    "\n",
-    "async with get_client() as client:\n",
-    "    limit_id = await client.create_concurrency_limit(\n",
-    "        tag=\"bottleneck\", \n",
-    "        concurrency_limit=2\n",
-    "    )\n",
-    "\n",
-    "def generate_task_run_name():\n",
-    "    task_name = task_run.task_name\n",
-    "\n",
-    "    parameters = task_run.parameters\n",
-    "\n",
-    "    atoms = parameters[\"atoms\"]\n",
-    "    \n",
-    "    return f\"{task_name}: {atoms.get_chemical_formula()}\"\n",
-    "\n",
-    "@task(task_run_name=generate_task_run_name, tags=[\"bottleneck\"], timeout_seconds=150)\n",
-    "def fit_one(atoms: Atoms, model: str):\n",
-    "    \n",
-    "    eos = EOS(\n",
-    "        atoms=atoms,\n",
-    "        calculator_name=model,\n",
-    "        calculator_kwargs={},\n",
-    "        device=None,\n",
-    "        optimizer=\"QuasiNewton\",\n",
-    "        optimizer_kwargs=None,\n",
-    "        filter=\"FrechetCell\",\n",
-    "        filter_kwargs=None,\n",
-    "        criterion=dict(\n",
-    "            fmax=0.1,\n",
-    "        ),\n",
-    "        max_abs_strain=0.1,\n",
-    "        npoints=7,\n",
-    "    )\n",
-    "    if isinstance(eos, dict):\n",
-    "        postprocess(output=eos, model=model, formula=atoms.get_chemical_formula())\n",
-    "        eos[\"method\"] = model\n",
-    "    \n",
-    "    return eos\n",
-    "    \n",
-    "#https://docs-3.prefect.io/3.0/develop/task-runners#use-multiple-task-runners\n",
-    "# @flow(task_runner=ThreadPoolTaskRunner(max_workers=50), log_prints=True)\n",
-    "@flow(task_runner=task_runner, log_prints=True)\n",
-    "def fit_all(atoms_list: list[Atoms]):\n",
-    "    \n",
-    "    futures = []\n",
-    "    for atoms in atoms_list:\n",
-    "        futures_per_atoms = []\n",
-    "        for model in MLIPEnum:\n",
-    "            \n",
-    "            # with concurrency(\"bottleneck\", occupy=2):\n",
-    "            future = fit_one.submit(atoms, model.name)\n",
-    "            # if not futures_per_atoms:\n",
-    "            #     if not futures:\n",
-    "            #         future = fit_one.submit(atoms, model.name)\n",
-    "            #     else:\n",
-    "            #         future = fit_one.submit(atoms, model.name, wait_for=[futures[-1]])                    \n",
-    "            # else:\n",
-    "            #     future = fit_one.submit(atoms, model.name, wait_for=[future])\n",
-    "            futures_per_atoms.append(future)\n",
-    "            \n",
-    "        futures.extend(futures_per_atoms)\n",
-    "\n",
-    "    return [f.result() for f in futures]\n",
-    "\n",
-    "\n",
-    "# @task(task_run_name=generate_task_run_name, result_storage=None)\n",
-    "# def fit_one(atoms: Atoms):\n",
-    "    \n",
-    "#     outputs = []\n",
-    "#     for model in MLIPEnum:\n",
-    "#         try:\n",
-    "#             eos = EOS(\n",
-    "#                 atoms=atoms,\n",
-    "#                 calculator_name=model.name,\n",
-    "#                 calculator_kwargs={},\n",
-    "#                 device=None,\n",
-    "#                 optimizer=\"QuasiNewton\",\n",
-    "#                 optimizer_kwargs=None,\n",
-    "#                 filter=\"FrechetCell\",\n",
-    "#                 filter_kwargs=None,\n",
-    "#                 criterion=dict(\n",
-    "#                     fmax=0.1,\n",
-    "#                 ),\n",
-    "#                 max_abs_strain=0.1,\n",
-    "#                 npoints=7,\n",
-    "#             )\n",
-    "#             if isinstance(eos, dict):\n",
-    "#                 postprocess(output=eos, model=model.name, formula=atoms.get_chemical_formula())\n",
-    "#                 eos[\"method\"] = model.name\n",
-    "#                 outputs.append(eos)\n",
-    "#         except:\n",
-    "#             continue\n",
-    "    \n",
-    "#     return outputs\n",
-    "\n",
-    "# # https://orion-docs.prefect.io/latest/concepts/task-runners/#using-multiple-task-runners\n",
-    "# @flow(task_runner=DaskTaskRunner(address=client.scheduler.address), log_prints=True, result_storage=None)\n",
-    "# def fit_all(atoms_list: list[Atoms]):\n",
-    "    \n",
-    "#     futures = []\n",
-    "#     for atoms in atoms_list:\n",
-    "#         future = fit_one.submit(atoms)\n",
-    "#         futures.append(future)\n",
-    "            \n",
-    "#     wait(futures)\n",
-    "    \n",
-    "#     return [f.result(raise_on_failure=False) for f in futures]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true,
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">18:53:47.335 | <span style=\"color: #008080; text-decoration-color: #008080\">INFO</span>    | prefect.engine - Created flow run<span style=\"color: #800080; text-decoration-color: #800080\"> 'vengeful-malkoha'</span> for flow<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\"> 'fit-all'</span>\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "18:53:47.335 | \u001b[36mINFO\u001b[0m    | prefect.engine - Created flow run\u001b[35m 'vengeful-malkoha'\u001b[0m for flow\u001b[1;35m 'fit-all'\u001b[0m\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">18:53:47.341 | <span style=\"color: #008080; text-decoration-color: #008080\">INFO</span>    | prefect.engine - View at <span style=\"color: #0000ff; text-decoration-color: #0000ff\">https://app.prefect.cloud/account/f7d40474-9362-4bfa-8950-ee6a43ec00f3/workspace/d4bb0913-5f5e-49f7-bfc5-06509088baeb/runs/flow-run/909d2bc4-695f-4eeb-8b7c-7660397a0692</span>\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "18:53:47.341 | \u001b[36mINFO\u001b[0m    | prefect.engine - View at \u001b[94mhttps://app.prefect.cloud/account/f7d40474-9362-4bfa-8950-ee6a43ec00f3/workspace/d4bb0913-5f5e-49f7-bfc5-06509088baeb/runs/flow-run/909d2bc4-695f-4eeb-8b7c-7660397a0692\u001b[0m\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">18:53:47.654 | <span style=\"color: #008080; text-decoration-color: #008080\">INFO</span>    | prefect.task_runner.dask - Connecting to existing Dask cluster SLURMCluster(df8c3d55, 'tcp://128.55.64.49:36289', workers=0, threads=0, memory=0 B)\n",
-       "</pre>\n"
-      ],
-      "text/plain": [
-       "18:53:47.654 | \u001b[36mINFO\u001b[0m    | prefect.task_runner.dask - Connecting to existing Dask cluster SLURMCluster(df8c3d55, 'tcp://128.55.64.49:36289', workers=0, threads=0, memory=0 B)\n"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "fit_all(atoms_list)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "```\n",
-    "Note that, because the DaskTaskRunner uses multiprocessing, calls to flows in scripts must be guarded with if __name__ == \"__main__\": or you will encounter warnings and errors.\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# import os\n",
-    "# import tempfile\n",
-    "# import shutil\n",
-    "# from contextlib import contextmanager\n",
-    "\n",
-    "# @contextmanager\n",
-    "# def twd():\n",
-    "    \n",
-    "#     pwd = os.getcwd()\n",
-    "#     temp_dir = tempfile.mkdtemp()\n",
-    "    \n",
-    "#     try:\n",
-    "#         os.chdir(temp_dir)\n",
-    "#         yield\n",
-    "#     finally:\n",
-    "#         os.chdir(pwd)\n",
-    "#         shutil.rmtree(temp_dir)\n",
-    "\n",
-    "# with twd():\n",
-    "\n",
-    "# fit_all(atoms_list)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "\n",
-    "df = pd.read_parquet('mace-mp/MACE-MP(M).parquet')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>formula</th>\n",
-       "      <th>method</th>\n",
-       "      <th>volumes</th>\n",
-       "      <th>energies</th>\n",
-       "      <th>K</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Ac2O3</td>\n",
-       "      <td>MACE-MP(M)</td>\n",
-       "      <td>[82.36010147441682, 85.41047560309894, 88.4608...</td>\n",
-       "      <td>[-39.47541427612305, -39.65580749511719, -39.7...</td>\n",
-       "      <td>95.755459</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Ac6In2</td>\n",
-       "      <td>MACE-MP(M)</td>\n",
-       "      <td>[278.3036976131417, 288.61124196918433, 298.91...</td>\n",
-       "      <td>[-31.21324348449707, -31.40914535522461, -31.5...</td>\n",
-       "      <td>33.370214</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>Ac6Tl2</td>\n",
-       "      <td>MACE-MP(M)</td>\n",
-       "      <td>[278.30267000598286, 288.6101763025008, 298.91...</td>\n",
-       "      <td>[-29.572534561157227, -29.833026885986328, -30...</td>\n",
-       "      <td>29.065081</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>Ac3Sn</td>\n",
-       "      <td>MACE-MP(M)</td>\n",
-       "      <td>[135.293532345587, 140.30440391394214, 145.315...</td>\n",
-       "      <td>[-17.135194778442383, -17.228239059448242, -17...</td>\n",
-       "      <td>30.622045</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>AcAg</td>\n",
-       "      <td>MACE-MP(M)</td>\n",
-       "      <td>[55.376437498321394, 57.4274166649259, 59.4783...</td>\n",
-       "      <td>[-7.274301528930664, -7.346108913421631, -7.39...</td>\n",
-       "      <td>40.212164</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>Ac4</td>\n",
-       "      <td>MACE-MP(M)</td>\n",
-       "      <td>[166.09086069175856, 172.2423740507126, 178.39...</td>\n",
-       "      <td>[-16.326059341430664, -16.406923294067383, -16...</td>\n",
-       "      <td>25.409891</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>Ac16S24</td>\n",
-       "      <td>MACE-MP(M)</td>\n",
-       "      <td>[1006.5670668063424, 1043.84732853991, 1081.12...</td>\n",
-       "      <td>[-249.4179229736328, -250.7970733642578, -251....</td>\n",
-       "      <td>61.734158</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   formula      method                                            volumes  \\\n",
-       "1    Ac2O3  MACE-MP(M)  [82.36010147441682, 85.41047560309894, 88.4608...   \n",
-       "2   Ac6In2  MACE-MP(M)  [278.3036976131417, 288.61124196918433, 298.91...   \n",
-       "3   Ac6Tl2  MACE-MP(M)  [278.30267000598286, 288.6101763025008, 298.91...   \n",
-       "4    Ac3Sn  MACE-MP(M)  [135.293532345587, 140.30440391394214, 145.315...   \n",
-       "5     AcAg  MACE-MP(M)  [55.376437498321394, 57.4274166649259, 59.4783...   \n",
-       "6      Ac4  MACE-MP(M)  [166.09086069175856, 172.2423740507126, 178.39...   \n",
-       "7  Ac16S24  MACE-MP(M)  [1006.5670668063424, 1043.84732853991, 1081.12...   \n",
-       "\n",
-       "                                            energies          K  \n",
-       "1  [-39.47541427612305, -39.65580749511719, -39.7...  95.755459  \n",
-       "2  [-31.21324348449707, -31.40914535522461, -31.5...  33.370214  \n",
-       "3  [-29.572534561157227, -29.833026885986328, -30...  29.065081  \n",
-       "4  [-17.135194778442383, -17.228239059448242, -17...  30.622045  \n",
-       "5  [-7.274301528930664, -7.346108913421631, -7.39...  40.212164  \n",
-       "6  [-16.326059341430664, -16.406923294067383, -16...  25.409891  \n",
-       "7  [-249.4179229736328, -250.7970733642578, -251....  61.734158  "
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "mlip-arena",
-   "language": "python",
-   "name": "mlip-arena"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.10"
-  },
-  "widgets": {
-   "application/vnd.jupyter.widget-state+json": {
-    "state": {},
-    "version_major": 2,
-    "version_minor": 0
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

mlip_arena/tasks/md.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Define molecular dynamics tasks.
 This script has been adapted from Atomate2 MLFF MD workflow written by Aaron Kaplan and Yuan Chiang
 https://github.com/materialsproject/atomate2/blob/main/src/atomate2/forcefields/md.py
@@ -60,6 +60,14 @@ from pathlib import Path
 from typing import Literal
 import numpy as np
 from ase import Atoms, units
 from ase.calculators.calculator import Calculator
 from ase.calculators.mixing import SumCalculator
@@ -77,13 +85,6 @@ from ase.md.velocitydistribution import (
     ZeroRotation,
 )
 from ase.md.verlet import VelocityVerlet
-from prefect import task
-from prefect.tasks import task_input_hash
-from scipy.interpolate import interp1d
-from scipy.linalg import schur
-from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
-from tqdm.auto import tqdm
 from mlip_arena.models import MLIPEnum
 from mlip_arena.models.utils import get_freer_device
@@ -186,7 +187,22 @@ def _get_ensemble_defaults(
     return ase_md_kwargs
-@task(cache_key_fn=task_input_hash, cache_expiration=timedelta(days=1))
 def run(
     atoms: Atoms,
     calculator_name: str | MLIPEnum,
@@ -196,10 +212,10 @@ def run(
     device: str | None = None,
     ensemble: Literal["nve", "nvt", "npt"] = "nvt",
     dynamics: str | MolecularDynamics = "langevin",
-    time_step: float | None = None, # fs
     total_time: float = 1000,  # fs
-    temperature: float | Sequence | np.ndarray | None = 300.0, # K
-    pressure: float | Sequence | np.ndarray | None = None, # eV/A^3
     ase_md_kwargs: dict | None = None,
     md_velocity_seed: int | None = None,
     zero_linear_momentum: bool = True,
@@ -282,7 +298,7 @@ def run(
         raise ValueError(f"Invalid dynamics: {dynamics}")
     if md_class is NPT:
-        #  Note that until md_func is instantiated, isinstance(md_func,NPT) is False
         # ASE NPT implementation requires upper triangular cell
         u, _ = schur(atoms.get_cell(complete=True), output="complex")
         atoms.set_cell(u.real, scale_atoms=True)

 """
+Define molecular dynamics task.
 This script has been adapted from Atomate2 MLFF MD workflow written by Aaron Kaplan and Yuan Chiang
 https://github.com/materialsproject/atomate2/blob/main/src/atomate2/forcefields/md.py
 from typing import Literal
 import numpy as np
+from prefect import task
+from prefect.runtime import task_run
+from prefect.tasks import task_input_hash
+from scipy.interpolate import interp1d
+from scipy.linalg import schur
+from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
+from tqdm.auto import tqdm
 from ase import Atoms, units
 from ase.calculators.calculator import Calculator
 from ase.calculators.mixing import SumCalculator
     ZeroRotation,
 )
 from ase.md.verlet import VelocityVerlet
 from mlip_arena.models import MLIPEnum
 from mlip_arena.models.utils import get_freer_device
     return ase_md_kwargs
+def _generate_task_run_name():
+    task_name = task_run.task_name
+    parameters = task_run.parameters
+    atoms = parameters["atoms"]
+    calculator_name = parameters["calculator_name"]
+    return f"{task_name}: {atoms.get_chemical_formula()} - {calculator_name}"
+@task(
+    name="MD",
+    task_run_name=_generate_task_run_name,
+    cache_key_fn=task_input_hash,
+    # cache_expiration=timedelta(days=1)
+)
 def run(
     atoms: Atoms,
     calculator_name: str | MLIPEnum,
     device: str | None = None,
     ensemble: Literal["nve", "nvt", "npt"] = "nvt",
     dynamics: str | MolecularDynamics = "langevin",
+    time_step: float | None = None,  # fs
     total_time: float = 1000,  # fs
+    temperature: float | Sequence | np.ndarray | None = 300.0,  # K
+    pressure: float | Sequence | np.ndarray | None = None,  # eV/A^3
     ase_md_kwargs: dict | None = None,
     md_velocity_seed: int | None = None,
     zero_linear_momentum: bool = True,
         raise ValueError(f"Invalid dynamics: {dynamics}")
     if md_class is NPT:
+        # Note that until md_func is instantiated, isinstance(md_func,NPT) is False
         # ASE NPT implementation requires upper triangular cell
         u, _ = schur(atoms.get_cell(complete=True), output="complex")
         atoms.set_cell(u.real, scale_atoms=True)

mlip_arena/tasks/optimize.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""
 Define structure optimization tasks.
 """
@@ -6,6 +6,11 @@ from __future__ import annotations
 from datetime import timedelta
 from ase import Atoms
 from ase.calculators.calculator import Calculator
 from ase.calculators.mixing import SumCalculator
@@ -13,10 +18,6 @@ from ase.filters import *  # type: ignore
 from ase.filters import Filter
 from ase.optimize import *  # type: ignore
 from ase.optimize.optimize import Optimizer
-from prefect import task
-from prefect.tasks import task_input_hash
-from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
 from mlip_arena.models import MLIPEnum
 from mlip_arena.models.utils import get_freer_device
@@ -26,7 +27,7 @@ _valid_filters: dict[str, Filter] = {
     "ExpCell": ExpCellFilter,
     "Strain": StrainFilter,
     "FrechetCell": FrechetCellFilter,
-} # type: ignore
 _valid_optimizers: dict[str, Optimizer] = {
     "MDMin": MDMin,
@@ -39,14 +40,25 @@ _valid_optimizers: dict[str, Optimizer] = {
     "GPMin": GPMin,
     "CellAwareBFGS": CellAwareBFGS,
     "ODE12r": ODE12r,
-} # type: ignore
-# @task(
-#     cache_key_fn=task_input_hash,
-#     cache_expiration=timedelta(days=1),
-#     timeout_seconds=120)
-@task(timeout_seconds=120, result_storage=None)
 def run(
     atoms: Atoms,
     calculator_name: str | MLIPEnum,
@@ -103,7 +115,6 @@ def run(
             raise ValueError(f"Invalid optimizer: {optimizer}")
         optimizer = _valid_optimizers[optimizer]
     filter_kwargs = filter_kwargs or {}
     optimizer_kwargs = optimizer_kwargs or {}
     criterion = criterion or {}
@@ -126,4 +137,3 @@ def run(
     return {
         "atoms": atoms,
     }

+"""
 Define structure optimization tasks.
 """
 from datetime import timedelta
+from prefect import task
+from prefect.runtime import task_run
+from prefect.tasks import task_input_hash
+from torch_dftd.torch_dftd3_calculator import TorchDFTD3Calculator
 from ase import Atoms
 from ase.calculators.calculator import Calculator
 from ase.calculators.mixing import SumCalculator
 from ase.filters import Filter
 from ase.optimize import *  # type: ignore
 from ase.optimize.optimize import Optimizer
 from mlip_arena.models import MLIPEnum
 from mlip_arena.models.utils import get_freer_device
     "ExpCell": ExpCellFilter,
     "Strain": StrainFilter,
     "FrechetCell": FrechetCellFilter,
+}  # type: ignore
 _valid_optimizers: dict[str, Optimizer] = {
     "MDMin": MDMin,
     "GPMin": GPMin,
     "CellAwareBFGS": CellAwareBFGS,
     "ODE12r": ODE12r,
+}  # type: ignore
+def _generate_task_run_name():
+    task_name = task_run.task_name
+    parameters = task_run.parameters
+    atoms = parameters["atoms"]
+    calculator_name = parameters["calculator_name"]
+    return f"{task_name}: {atoms.get_chemical_formula()} - {calculator_name}"
+@task(
+    name="MD",
+    task_run_name=_generate_task_run_name,
+    cache_key_fn=task_input_hash,
+    # cache_expiration=timedelta(days=1)
+)
 def run(
     atoms: Atoms,
     calculator_name: str | MLIPEnum,
             raise ValueError(f"Invalid optimizer: {optimizer}")
         optimizer = _valid_optimizers[optimizer]
     filter_kwargs = filter_kwargs or {}
     optimizer_kwargs = optimizer_kwargs or {}
     criterion = criterion or {}
     return {
         "atoms": atoms,
     }

tests/test_eos.py CHANGED Viewed

@@ -2,14 +2,38 @@ import sys
 import pytest
 from ase.build import bulk
 from prefect.testing.utilities import prefect_test_harness
 from mlip_arena.models import MLIPEnum
-from mlip_arena.tasks.eos.run import fit as EOS
-atoms = bulk("Cu", "fcc", a=3.6)
-@pytest.mark.skipif(sys.version_info[:2] != (3,11), reason="avoid prefect race condition on concurrent tasks")
 @pytest.mark.parametrize("model", [MLIPEnum["MACE-MP(M)"]])
 def test_eos(model: MLIPEnum):
     """
@@ -17,21 +41,8 @@ def test_eos(model: MLIPEnum):
     """
     with prefect_test_harness():
-        result = EOS(
-            atoms=atoms,
             calculator_name=model.name,
-            calculator_kwargs={},
-            device=None,
-            optimizer="BFGSLineSearch",
-            optimizer_kwargs=None,
-            filter="FrechetCell",
-            filter_kwargs=None,
-            criterion=dict(
-                fmax=0.1,
-            ),
-            max_abs_strain=0.1,
-            npoints=6,
         )
         assert isinstance(result["K"], float)

 import pytest
 from ase.build import bulk
+from prefect import flow
 from prefect.testing.utilities import prefect_test_harness
 from mlip_arena.models import MLIPEnum
+from mlip_arena.tasks.eos import run as EOS
+@flow
+def single_eos_flow(calculator_name):
+    atoms = bulk("Cu", "fcc", a=3.6)
+    return EOS(
+        atoms=atoms,
+        calculator_name=calculator_name,
+        calculator_kwargs={},
+        device=None,
+        optimizer="BFGSLineSearch",
+        optimizer_kwargs=None,
+        filter="FrechetCell",
+        filter_kwargs=None,
+        criterion=dict(
+            fmax=0.1,
+        ),
+        max_abs_strain=0.1,
+        npoints=6,
+    )
+@pytest.mark.skipif(
+    sys.version_info[:2] != (3, 11),
+    reason="avoid prefect race condition on concurrent tasks",
+)
 @pytest.mark.parametrize("model", [MLIPEnum["MACE-MP(M)"]])
 def test_eos(model: MLIPEnum):
     """
     """
     with prefect_test_harness():
+        result = single_eos_flow(
             calculator_name=model.name,
         )
         assert isinstance(result["K"], float)