| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import sys, csv, re |
| from pathlib import Path |
|
|
| |
| OUT_DIR = Path("../../RESULTS") |
| SMILES_CSV = Path("../../..") / "SMILES.csv" |
| PID_COL = "PID" |
| SMILES_COL = "SMILES" |
|
|
| |
| PROP_FILES_BASE = { |
| "K(GPa)" : "K_result.dat", |
| "G1(GPa)": "G1_result.dat", |
| "G2(GPa)": "G2_result.dat", |
| "G(GPa)" : "G_result.dat", |
| "nu" : "nu_result.dat", |
| "E(GPa)" : "E_result.dat", |
| } |
|
|
| |
| PROP_OUTFILE = { |
| "K(GPa)" : "K_MD.csv", |
| "G1(GPa)": "G1_MD.csv", |
| "G2(GPa)": "G2_MD.csv", |
| "G(GPa)" : "G_MD.csv", |
| "nu" : "NU_MD.csv", |
| "E(GPa)" : "E_MD.csv", |
| } |
|
|
| FLOAT_RE = re.compile(r"[-+]?(?:\d+\.?\d*|\.\d+)(?:[eE][-+]?\d+)?") |
|
|
| def usage(): |
| print("Usage: python update_modulus_csv.py PID") |
| sys.exit(1) |
|
|
| def first_number(text: str): |
| m = FLOAT_RE.search(text) |
| return m.group(0) if m else None |
|
|
| def load_smiles(pid: str) -> str: |
| if not SMILES_CSV.exists(): |
| return "" |
| with SMILES_CSV.open(newline="", encoding="utf-8") as fh: |
| rdr = csv.DictReader(fh) |
| if not rdr.fieldnames: |
| return "" |
| lower = {k.lower(): k for k in rdr.fieldnames} |
| pid_k, smi_k = lower.get("pid"), lower.get("smiles") |
| if not pid_k or not smi_k: |
| return "" |
| for row in rdr: |
| if (row.get(pid_k) or "").strip() == pid: |
| return (row.get(smi_k) or "").strip() |
| return "" |
|
|
| def read_existing_prop(csv_path: Path, prop_col: str): |
| """Read an existing per-property CSV into a dict keyed by PID.""" |
| rows = {} |
| if csv_path.exists(): |
| with csv_path.open(newline="", encoding="utf-8") as fh: |
| rdr = csv.DictReader(fh) |
| for row in rdr: |
| k = (row.get(PID_COL) or "").strip() |
| if not k: |
| continue |
| rows[k] = { |
| PID_COL: k, |
| SMILES_COL: (row.get(SMILES_COL) or "").strip(), |
| prop_col: (row.get(prop_col) or "").strip(), |
| } |
| return rows |
|
|
| def write_prop(csv_path: Path, rows: dict, prop_col: str): |
| csv_path.parent.mkdir(parents=True, exist_ok=True) |
| with csv_path.open("w", newline="", encoding="utf-8") as fh: |
| w = csv.DictWriter(fh, fieldnames=[PID_COL, SMILES_COL, prop_col]) |
| w.writeheader() |
| |
| for pid_key in sorted(rows.keys()): |
| w.writerow(rows[pid_key]) |
|
|
| def main(): |
| if len(sys.argv) != 2: |
| usage() |
| pid = sys.argv[1].strip() |
| if not pid: |
| usage() |
|
|
| |
| updates = {} |
| for prop_col, base in PROP_FILES_BASE.items(): |
| fname = f"{pid}_{base}" |
| p = Path(fname) |
| if not p.exists(): |
| print(f"[SKIP] {fname}: not found") |
| continue |
| try: |
| txt = p.read_text(encoding="utf-8", errors="ignore") |
| except Exception: |
| print(f"[SKIP] {fname}: cannot read") |
| continue |
| val = first_number(txt) |
| if val is None: |
| print(f"[SKIP] {fname}: no numeric value found") |
| continue |
| updates[prop_col] = val |
| print(f"[OK] {fname}: {prop_col}={val}") |
|
|
| if not updates: |
| print("[INFO] No modulus values found; nothing to write.") |
| return |
|
|
| smiles = load_smiles(pid) |
|
|
| |
| for prop_col, val in updates.items(): |
| out_csv = OUT_DIR / PROP_OUTFILE[prop_col] |
| rows = read_existing_prop(out_csv, prop_col) |
|
|
| row = rows.get(pid, {PID_COL: pid, SMILES_COL: "", prop_col: ""}) |
| row[PID_COL] = pid |
| |
| row[SMILES_COL] = smiles or row.get(SMILES_COL, "") |
| row[prop_col] = val |
| rows[pid] = row |
|
|
| write_prop(out_csv, rows, prop_col) |
| print(f"[DONE] Updated {out_csv.name} for PID={pid}") |
|
|
| if __name__ == "__main__": |
| main() |
|
|