| | |
| | """ |
| | Docking Utilities for AmberPrep |
| | |
| | This module contains all the Python functions needed for the docking workflow: |
| | 1. Compute ligand center |
| | 2. Prepare receptor (tleap + pdb4amber + meeko) |
| | 3. Prepare ligand (obabel + meeko) |
| | 4. Run Vina docking |
| | 5. Split docked poses (vina_split) |
| | 6. Convert poses to PDB (obabel) |
| | 7. Sanitize docked poses for use in MD workflow |
| | |
| | Usage: |
| | from docking_utils import ( |
| | compute_ligand_center, |
| | prepare_receptor, |
| | prepare_ligand, |
| | run_vina_docking, |
| | split_docked_poses, |
| | convert_pdbqt_to_pdb, |
| | sanitize_docked_pose |
| | ) |
| | """ |
| |
|
| | import subprocess |
| | from pathlib import Path |
| | import logging |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | def compute_ligand_center(pdb_path: str) -> tuple: |
| | """ |
| | Compute the geometric center of all atoms in a ligand PDB file. |
| | |
| | Args: |
| | pdb_path: Path to the ligand PDB file |
| | |
| | Returns: |
| | Tuple of (x, y, z) center coordinates |
| | """ |
| | try: |
| | import MDAnalysis as mda |
| | import numpy as np |
| | except ImportError as e: |
| | raise RuntimeError( |
| | "MDAnalysis and NumPy are required. Install with: " |
| | "conda install -c conda-forge mdanalysis numpy" |
| | ) from e |
| | |
| | pdb_path = Path(pdb_path) |
| | if not pdb_path.exists(): |
| | raise FileNotFoundError(f"Ligand file not found: {pdb_path}") |
| | |
| | u = mda.Universe(str(pdb_path)) |
| | if u.atoms.n_atoms == 0: |
| | raise ValueError(f"No atoms found in ligand file {pdb_path}") |
| | |
| | coords = u.atoms.positions.astype(float) |
| | center = coords.mean(axis=0) |
| | |
| | logger.info(f"Ligand center for {pdb_path.name}: ({center[0]:.3f}, {center[1]:.3f}, {center[2]:.3f})") |
| | return float(center[0]), float(center[1]), float(center[2]) |
| |
|
| |
|
| | def prepare_receptor(protein_pdb: str, output_dir: str) -> tuple: |
| | """ |
| | Prepare receptor for docking: |
| | 1. Run tleap to add hydrogens |
| | 2. Run pdb4amber to fix element names |
| | 3. Run mk_prepare_receptor.py to create PDBQT |
| | |
| | Args: |
| | protein_pdb: Path to protein PDB file (typically 1_protein_no_hydrogens.pdb) |
| | output_dir: Directory to store output files |
| | |
| | Returns: |
| | Tuple of (receptor_fixed_pdb_path, receptor_pdbqt_path) |
| | """ |
| | protein_pdb = Path(protein_pdb).resolve() |
| | output_dir = Path(output_dir) |
| | output_dir.mkdir(parents=True, exist_ok=True) |
| | |
| | if not protein_pdb.exists(): |
| | raise FileNotFoundError(f"Protein PDB not found: {protein_pdb}") |
| | |
| | |
| | tleap_in = output_dir / "prepare_receptor.in" |
| | receptor_pdb = output_dir / "receptor.pdb" |
| | |
| | if not receptor_pdb.exists(): |
| | logger.info("Step 1: Running tleap to add hydrogens to protein...") |
| | with open(tleap_in, "w") as f: |
| | f.write("source leaprc.protein.ff14SB\n") |
| | f.write(f"protein = loadpdb {protein_pdb}\n") |
| | f.write("savepdb protein receptor.pdb\n") |
| | f.write("quit\n") |
| | |
| | result = subprocess.run( |
| | ["tleap", "-f", tleap_in.name], |
| | cwd=output_dir, |
| | capture_output=True, |
| | text=True, |
| | ) |
| | if result.returncode != 0 or not receptor_pdb.exists(): |
| | raise RuntimeError( |
| | f"tleap failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" |
| | ) |
| | logger.info(f" Created: {receptor_pdb}") |
| | |
| | |
| | receptor_fixed = output_dir / "receptor_fixed.pdb" |
| | |
| | if not receptor_fixed.exists(): |
| | logger.info("Step 2: Running pdb4amber to add element names...") |
| | result = subprocess.run( |
| | ["pdb4amber", "-i", str(receptor_pdb), "-o", str(receptor_fixed)], |
| | capture_output=True, |
| | text=True, |
| | ) |
| | if result.returncode != 0 or not receptor_fixed.exists(): |
| | raise RuntimeError( |
| | f"pdb4amber failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" |
| | ) |
| | logger.info(f" Created: {receptor_fixed}") |
| | |
| | |
| | receptor_pdbqt = output_dir / "receptor.pdbqt" |
| | |
| | if not receptor_pdbqt.exists(): |
| | logger.info("Step 3: Running mk_prepare_receptor.py to create PDBQT...") |
| | result = subprocess.run( |
| | ["mk_prepare_receptor.py", "-i", str(receptor_fixed), "-o", "receptor", "-p"], |
| | cwd=output_dir, |
| | capture_output=True, |
| | text=True, |
| | ) |
| | if result.returncode != 0 or not receptor_pdbqt.exists(): |
| | raise RuntimeError( |
| | f"mk_prepare_receptor.py failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" |
| | ) |
| | logger.info(f" Created: {receptor_pdbqt}") |
| | |
| | return str(receptor_fixed), str(receptor_pdbqt) |
| |
|
| |
|
| | def prepare_ligand(ligand_pdb: str, output_dir: str, ligand_index: int = 1) -> str: |
| | """ |
| | Prepare ligand for docking: |
| | 1. Convert PDB to SDF using obabel |
| | 2. Convert SDF to PDBQT using mk_prepare_ligand.py |
| | |
| | Args: |
| | ligand_pdb: Path to ligand PDB file |
| | output_dir: Directory to store output files |
| | ligand_index: Index number for naming output files |
| | |
| | Returns: |
| | Path to ligand PDBQT file |
| | """ |
| | ligand_pdb = Path(ligand_pdb) |
| | output_dir = Path(output_dir) |
| | output_dir.mkdir(parents=True, exist_ok=True) |
| | |
| | if not ligand_pdb.exists(): |
| | raise FileNotFoundError(f"Ligand PDB not found: {ligand_pdb}") |
| | |
| | |
| | sdf_path = output_dir / f"ligand_{ligand_index}.sdf" |
| | |
| | logger.info(f"Step 1: Converting ligand {ligand_index} PDB to SDF...") |
| | result = subprocess.run( |
| | ["obabel", "-i", "pdb", str(ligand_pdb), "-o", "sdf", "-O", str(sdf_path)], |
| | capture_output=True, |
| | text=True, |
| | ) |
| | if result.returncode != 0 or not sdf_path.exists(): |
| | raise RuntimeError( |
| | f"obabel failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" |
| | ) |
| | logger.info(f" Created: {sdf_path}") |
| | |
| | |
| | pdbqt_path = output_dir / f"ligand_{ligand_index}.pdbqt" |
| | |
| | logger.info(f"Step 2: Converting ligand {ligand_index} SDF to PDBQT...") |
| | result = subprocess.run( |
| | ["mk_prepare_ligand.py", "-i", str(sdf_path), "-o", str(pdbqt_path)], |
| | capture_output=True, |
| | text=True, |
| | ) |
| | if result.returncode != 0 or not pdbqt_path.exists(): |
| | raise RuntimeError( |
| | f"mk_prepare_ligand.py failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" |
| | ) |
| | logger.info(f" Created: {pdbqt_path}") |
| | |
| | return str(pdbqt_path) |
| |
|
| |
|
| | def run_vina_docking( |
| | receptor_pdbqt: str, |
| | ligand_pdbqt: str, |
| | center_x: float, |
| | center_y: float, |
| | center_z: float, |
| | size_x: float = 18.0, |
| | size_y: float = 18.0, |
| | size_z: float = 18.0, |
| | output_dir: str = None, |
| | ligand_index: int = 1, |
| | exhaustiveness: int = 8, |
| | num_modes: int = 9, |
| | ) -> tuple: |
| | """ |
| | Run AutoDock Vina docking. |
| | |
| | Args: |
| | receptor_pdbqt: Path to receptor PDBQT file |
| | ligand_pdbqt: Path to ligand PDBQT file |
| | center_x, center_y, center_z: Box center coordinates (Angstroms) |
| | size_x, size_y, size_z: Box dimensions (Angstroms) |
| | output_dir: Directory for output files (default: same as ligand) |
| | ligand_index: Index for naming output files |
| | exhaustiveness: Search exhaustiveness (default: 8) |
| | num_modes: Maximum number of binding modes (default: 9) |
| | |
| | Returns: |
| | Tuple of (docked_pdbqt_path, log_file_path) |
| | """ |
| | ligand_pdbqt = Path(ligand_pdbqt) |
| | output_dir = Path(output_dir) if output_dir else ligand_pdbqt.parent |
| | |
| | docked_pdbqt = output_dir / f"ligand_{ligand_index}_docked.pdbqt" |
| | log_file = output_dir / f"ligand_{ligand_index}_docked.log" |
| | |
| | logger.info(f"Running Vina docking for ligand {ligand_index}...") |
| | logger.info(f" Center: ({center_x:.3f}, {center_y:.3f}, {center_z:.3f})") |
| | logger.info(f" Size: ({size_x:.1f}, {size_y:.1f}, {size_z:.1f})") |
| | |
| | cmd = [ |
| | "vina", |
| | "--receptor", str(receptor_pdbqt), |
| | "--ligand", str(ligand_pdbqt), |
| | "--center_x", str(center_x), |
| | "--center_y", str(center_y), |
| | "--center_z", str(center_z), |
| | "--size_x", str(size_x), |
| | "--size_y", str(size_y), |
| | "--size_z", str(size_z), |
| | "--out", str(docked_pdbqt), |
| | "--log", str(log_file), |
| | "--exhaustiveness", str(exhaustiveness), |
| | "--num_modes", str(num_modes), |
| | ] |
| | |
| | result = subprocess.run(cmd, capture_output=True, text=True) |
| | |
| | if result.returncode != 0 or not docked_pdbqt.exists(): |
| | raise RuntimeError( |
| | f"Vina docking failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" |
| | ) |
| | |
| | logger.info(f" Created: {docked_pdbqt}") |
| | logger.info(f" Log: {log_file}") |
| | |
| | return str(docked_pdbqt), str(log_file) |
| |
|
| |
|
| | def parse_vina_log(log_path: str) -> list: |
| | """ |
| | Parse Vina log file to extract binding energies for each mode. |
| | |
| | Args: |
| | log_path: Path to Vina log file |
| | |
| | Returns: |
| | List of dicts with 'mode', 'affinity', 'rmsd_lb', 'rmsd_ub' for each pose |
| | """ |
| | log_path = Path(log_path) |
| | if not log_path.exists(): |
| | return [] |
| | |
| | energies = [] |
| | in_results = False |
| | |
| | with open(log_path, "r") as f: |
| | for line in f: |
| | line = line.strip() |
| | if "-----+------------+----------+----------" in line: |
| | in_results = True |
| | continue |
| | if in_results and line and line[0].isdigit(): |
| | parts = line.split() |
| | if len(parts) >= 4: |
| | try: |
| | energies.append({ |
| | 'mode': int(parts[0]), |
| | 'affinity': float(parts[1]), |
| | 'rmsd_lb': float(parts[2]), |
| | 'rmsd_ub': float(parts[3]), |
| | }) |
| | except (ValueError, IndexError): |
| | continue |
| | elif in_results and not line: |
| | break |
| | |
| | return energies |
| |
|
| |
|
| | def split_docked_poses(docked_pdbqt: str, output_prefix: str = None) -> list: |
| | """ |
| | Split docked PDBQT into individual pose files using vina_split. |
| | |
| | Args: |
| | docked_pdbqt: Path to docked PDBQT file with multiple poses |
| | output_prefix: Prefix for output files (default: derived from input) |
| | |
| | Returns: |
| | List of paths to individual pose PDBQT files |
| | """ |
| | docked_pdbqt = Path(docked_pdbqt) |
| | if not docked_pdbqt.exists(): |
| | raise FileNotFoundError(f"Docked PDBQT not found: {docked_pdbqt}") |
| | |
| | output_dir = docked_pdbqt.parent |
| | if output_prefix is None: |
| | output_prefix = docked_pdbqt.stem.replace("_docked", "_mode") |
| | |
| | logger.info(f"Splitting docked poses from {docked_pdbqt.name}...") |
| | |
| | result = subprocess.run( |
| | ["vina_split", "--input", str(docked_pdbqt), "--ligand", output_prefix], |
| | cwd=output_dir, |
| | capture_output=True, |
| | text=True, |
| | ) |
| | |
| | if result.returncode != 0: |
| | raise RuntimeError( |
| | f"vina_split failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" |
| | ) |
| | |
| | |
| | pose_files = sorted(output_dir.glob(f"{output_prefix}*.pdbqt")) |
| | logger.info(f" Split into {len(pose_files)} pose files") |
| | |
| | return [str(f) for f in pose_files] |
| |
|
| |
|
| | def convert_pdbqt_to_pdb(pdbqt_path: str, ph: float = 7.4) -> str: |
| | """ |
| | Convert PDBQT file to PDB using obabel. |
| | |
| | Args: |
| | pdbqt_path: Path to PDBQT file |
| | ph: pH for protonation (default: 7.4) |
| | |
| | Returns: |
| | Path to output PDB file |
| | """ |
| | pdbqt_path = Path(pdbqt_path) |
| | if not pdbqt_path.exists(): |
| | raise FileNotFoundError(f"PDBQT file not found: {pdbqt_path}") |
| | |
| | pdb_path = pdbqt_path.with_suffix(".pdb") |
| | |
| | logger.info(f"Converting {pdbqt_path.name} to PDB...") |
| | |
| | result = subprocess.run( |
| | ["obabel", str(pdbqt_path), "-O", str(pdb_path), "-p", str(ph)], |
| | capture_output=True, |
| | text=True, |
| | ) |
| | |
| | if result.returncode != 0 or not pdb_path.exists(): |
| | raise RuntimeError( |
| | f"obabel conversion failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}" |
| | ) |
| | |
| | logger.info(f" Created: {pdb_path}") |
| | return str(pdb_path) |
| |
|
| |
|
| | def sanitize_docked_pose(original_ligand: str, pose_pdb: str) -> str: |
| | """ |
| | Sanitize a docked pose PDB to match the original ligand format: |
| | - Restore residue name, chain ID, and residue number from original |
| | - Convert ATOM to HETATM |
| | - Rename atoms to match original format (C1, N1, etc.) |
| | - Remove CONECT/MASTER records |
| | |
| | Args: |
| | original_ligand: Path to original ligand PDB file |
| | pose_pdb: Path to docked pose PDB file |
| | |
| | Returns: |
| | Path to sanitized pose PDB (same as pose_pdb, modified in place) |
| | """ |
| | original_ligand = Path(original_ligand) |
| | pose_pdb = Path(pose_pdb) |
| | |
| | if not original_ligand.exists(): |
| | raise FileNotFoundError(f"Original ligand not found: {original_ligand}") |
| | if not pose_pdb.exists(): |
| | raise FileNotFoundError(f"Pose PDB not found: {pose_pdb}") |
| | |
| | |
| | resname = "LIG" |
| | chain = "X" |
| | resnum = 1 |
| | |
| | with open(original_ligand, "r") as f: |
| | for line in f: |
| | if line.startswith(("ATOM", "HETATM")): |
| | resname = line[17:20].strip() or "LIG" |
| | chain = line[21] if len(line) > 21 and line[21].strip() else "X" |
| | try: |
| | resnum = int(line[22:26].strip()) |
| | except ValueError: |
| | resnum = 1 |
| | break |
| | |
| | logger.info(f"Sanitizing pose with resname={resname}, chain={chain}, resnum={resnum}") |
| | |
| | |
| | new_lines = [] |
| | atom_counter = 0 |
| | element_counts = {} |
| | |
| | with open(pose_pdb, "r") as f: |
| | for line in f: |
| | if line.startswith(("CONECT", "MASTER")): |
| | continue |
| | if line.startswith(("ATOM", "HETATM")): |
| | atom_counter += 1 |
| | |
| | |
| | element = line[76:78].strip() if len(line) > 77 else "" |
| | if not element: |
| | |
| | atom_name = line[12:16].strip() |
| | element = ''.join(c for c in atom_name if c.isalpha())[:2] |
| | if len(element) > 1: |
| | element = element[0].upper() + element[1].lower() |
| | |
| | if not element: |
| | element = "C" |
| | |
| | |
| | element_counts[element] = element_counts.get(element, 0) + 1 |
| | new_atom_name = f"{element}{element_counts[element]}" |
| | new_atom_name = f"{new_atom_name:<4}" |
| | |
| | |
| | new_line = ( |
| | f"HETATM{atom_counter:5d} {new_atom_name}" |
| | f"{resname:>3s} {chain}{resnum:4d} " |
| | f"{line[30:54]}" |
| | f"{line[54:66] if len(line) > 54 else ' 1.00 0.00'}" |
| | f" {element:>2s}\n" |
| | ) |
| | new_lines.append(new_line) |
| | elif line.startswith("END"): |
| | new_lines.append("END\n") |
| | |
| | |
| | with open(pose_pdb, "w") as f: |
| | f.writelines(new_lines) |
| | |
| | logger.info(f" Sanitized: {pose_pdb}") |
| | return str(pose_pdb) |
| |
|
| |
|
| | def run_full_docking_workflow( |
| | protein_pdb: str, |
| | ligand_pdbs: list, |
| | output_dir: str, |
| | box_configs: dict = None, |
| | ) -> dict: |
| | """ |
| | Run the complete docking workflow for multiple ligands. |
| | |
| | Args: |
| | protein_pdb: Path to protein PDB file (1_protein_no_hydrogens.pdb) |
| | ligand_pdbs: List of paths to ligand PDB files |
| | output_dir: Base output directory for docking results |
| | box_configs: Optional dict of {ligand_index: {'center': (x,y,z), 'size': (sx,sy,sz)}} |
| | |
| | Returns: |
| | Dict with results for each ligand including poses and energies |
| | """ |
| | output_dir = Path(output_dir) |
| | output_dir.mkdir(parents=True, exist_ok=True) |
| | box_configs = box_configs or {} |
| | |
| | results = { |
| | 'success': True, |
| | 'ligands': [], |
| | 'warnings': [], |
| | 'errors': [], |
| | } |
| | |
| | |
| | logger.info("=" * 60) |
| | logger.info("STEP 1: Preparing receptor for docking") |
| | logger.info("=" * 60) |
| | |
| | try: |
| | receptor_fixed, receptor_pdbqt = prepare_receptor(protein_pdb, str(output_dir)) |
| | except Exception as e: |
| | results['success'] = False |
| | results['errors'].append(f"Receptor preparation failed: {str(e)}") |
| | return results |
| | |
| | |
| | for idx, ligand_pdb in enumerate(ligand_pdbs, start=1): |
| | ligand_pdb = Path(ligand_pdb) |
| | logger.info("") |
| | logger.info("=" * 60) |
| | logger.info(f"STEP 2.{idx}: Processing ligand {idx}: {ligand_pdb.name}") |
| | logger.info("=" * 60) |
| | |
| | lig_dir = output_dir / f"ligand_{idx}" |
| | lig_dir.mkdir(parents=True, exist_ok=True) |
| | |
| | ligand_result = { |
| | 'index': idx, |
| | 'original_file': str(ligand_pdb), |
| | 'poses': [], |
| | 'energies': [], |
| | 'success': True, |
| | } |
| | |
| | try: |
| | |
| | original_copy = lig_dir / "original_ligand.pdb" |
| | if not original_copy.exists(): |
| | original_copy.write_text(ligand_pdb.read_text()) |
| | |
| | |
| | ligand_pdbqt = prepare_ligand(str(ligand_pdb), str(lig_dir), idx) |
| | |
| | |
| | cfg = box_configs.get(idx, {}) |
| | center = cfg.get('center') |
| | size = cfg.get('size', (18.0, 18.0, 18.0)) |
| | |
| | if center is None: |
| | |
| | cx, cy, cz = compute_ligand_center(str(ligand_pdb)) |
| | else: |
| | cx, cy, cz = center |
| | |
| | sx, sy, sz = size |
| | |
| | |
| | docked_pdbqt, log_file = run_vina_docking( |
| | receptor_pdbqt, ligand_pdbqt, |
| | cx, cy, cz, sx, sy, sz, |
| | str(lig_dir), idx |
| | ) |
| | |
| | |
| | energies = parse_vina_log(log_file) |
| | ligand_result['energies'] = energies |
| | |
| | |
| | pose_pdbqts = split_docked_poses(docked_pdbqt) |
| | |
| | |
| | for pose_pdbqt in pose_pdbqts: |
| | pose_pdb = convert_pdbqt_to_pdb(pose_pdbqt) |
| | sanitize_docked_pose(str(original_copy), pose_pdb) |
| | ligand_result['poses'].append(pose_pdb) |
| | |
| | except Exception as e: |
| | ligand_result['success'] = False |
| | ligand_result['error'] = str(e) |
| | results['errors'].append(f"Ligand {idx}: {str(e)}") |
| | logger.error(f"Error processing ligand {idx}: {e}") |
| | |
| | results['ligands'].append(ligand_result) |
| | |
| | |
| | results['success'] = all(lig['success'] for lig in results['ligands']) |
| | |
| | logger.info("") |
| | logger.info("=" * 60) |
| | logger.info("DOCKING WORKFLOW COMPLETE") |
| | logger.info("=" * 60) |
| | |
| | return results |
| |
|
| |
|
| | |
| | if __name__ == "__main__": |
| | import argparse |
| | |
| | logging.basicConfig(level=logging.INFO, format='%(message)s') |
| | |
| | parser = argparse.ArgumentParser(description="Run AutoDock Vina docking workflow") |
| | parser.add_argument("--protein", required=True, help="Path to protein PDB file") |
| | parser.add_argument("--ligands", nargs="+", required=True, help="Paths to ligand PDB files") |
| | parser.add_argument("--output", required=True, help="Output directory") |
| | parser.add_argument("--center", nargs=3, type=float, help="Box center (x y z)") |
| | parser.add_argument("--size", nargs=3, type=float, default=[18, 18, 18], help="Box size (x y z)") |
| | |
| | args = parser.parse_args() |
| | |
| | box_configs = {} |
| | if args.center: |
| | for i in range(1, len(args.ligands) + 1): |
| | box_configs[i] = { |
| | 'center': tuple(args.center), |
| | 'size': tuple(args.size), |
| | } |
| | |
| | results = run_full_docking_workflow( |
| | args.protein, |
| | args.ligands, |
| | args.output, |
| | box_configs |
| | ) |
| | |
| | print("\n" + "=" * 60) |
| | print("RESULTS SUMMARY") |
| | print("=" * 60) |
| | print(f"Overall success: {results['success']}") |
| | for lig in results['ligands']: |
| | print(f"\nLigand {lig['index']}:") |
| | print(f" Success: {lig['success']}") |
| | if lig['success']: |
| | print(f" Poses generated: {len(lig['poses'])}") |
| | if lig['energies']: |
| | print(f" Best binding energy: {lig['energies'][0]['affinity']} kcal/mol") |
| | else: |
| | print(f" Error: {lig.get('error', 'Unknown')}") |
| |
|