| from utils import * |
| from geometry import * |
|
|
| import os |
| import pandas as pd |
| import subprocess |
| import torch |
| import numpy as np |
| import shutil |
| from tqdm import tqdm |
|
|
| from joblib import delayed, Parallel |
|
|
| from Bio.PDB import PDBParser, PDBIO, Select |
|
|
|
|
| HELPERS = "/datapool/data2/home/jiahan/Tool/ProteinMPNN/helper_scripts" |
| RUNNER = "/datapool/data2/home/jiahan/Tool/ProteinMPNN/protein_mpnn_run.py" |
|
|
| def get_chain_nums(pdb_path,chain_id): |
| parser = PDBParser() |
| chain = parser.get_structure('X',pdb_path)[0][chain_id] |
| residue_nums = [residue.get_id()[1] for residue in chain] |
| return residue_nums |
|
|
| def process_mpnn_bb(name='1aze_B',chains_to_design="A",num_samples=1): |
| input_dir = './Data/Models_new/Codesign/bb/pdbs' |
| output_dir = './Data/Models_new/Codesign/bb/seqs' |
| if not os.path.exists(os.path.join(output_dir,name)): |
| os.makedirs(os.path.join(output_dir,name)) |
| dirname = os.path.join(output_dir,name) |
| |
| path_for_parsed_chains=os.path.join(dirname,'parsed_pdbs.jsonl') |
| path_for_assigned_chains=os.path.join(dirname,'assigned_pdbs.jsonl') |
| path_for_fixed_positions=os.path.join(dirname,'fixed_pdbs.jsonl') |
| residue_nums = get_chain_nums(os.path.join(input_dir,name,'gt.pdb'),chains_to_design) |
| design_only_positions = " ".join(map(str,residue_nums)) |
| |
| |
| subprocess.run([ |
| "python", os.path.join(HELPERS,"parse_multiple_chains.py"), |
| "--input_path", os.path.join(input_dir,name), |
| "--output_path", path_for_parsed_chains, |
| ]) |
| subprocess.run([ |
| "python", os.path.join(HELPERS,"assign_fixed_chains.py"), |
| "--input_path", path_for_parsed_chains, |
| "--output_path", path_for_assigned_chains, |
| '--chain_list', chains_to_design, |
| ]) |
| subprocess.run([ |
| "python", os.path.join(HELPERS,"make_fixed_positions_dict.py"), |
| "--input_path", path_for_parsed_chains, |
| "--output_path", path_for_fixed_positions, |
| '--chain_list', chains_to_design, |
| '--position_list', design_only_positions, |
| '--specify_non_fixed' |
| ]) |
| |
| |
| subprocess.run([ |
| "python", RUNNER, |
| "--jsonl_path", path_for_parsed_chains, |
| "--chain_id_jsonl", path_for_assigned_chains, |
| "--fixed_positions_jsonl", path_for_fixed_positions, |
| "--out_folder", dirname, |
| "--num_seq_per_target", f"{num_samples}", |
| "--sampling_temp", "0.1", |
| "--seed", "37", |
| "--batch_size","1", |
| '--device','cuda:1' |
| ]) |
|
|
| def process_one_item_mpnn(name='1a1m_C',chains_to_design="A",num_samples=1): |
| input_dir="./Data/Baselines_new/Tests" |
| output_dir="./Data/Baselines_new/Codesign" |
| if not os.path.exists(os.path.join(output_dir,name,'mpnns')): |
| os.makedirs(os.path.join(output_dir,name,'mpnns')) |
| |
| |
| dirname = os.path.join(output_dir,name,'mpnns') |
| |
| path_for_parsed_chains=os.path.join(dirname,'parsed_pdbs.jsonl') |
| path_for_assigned_chains=os.path.join(dirname,'assigned_pdbs.jsonl') |
| path_for_fixed_positions=os.path.join(dirname,'fixed_pdbs.jsonl') |
| with open(os.path.join(input_dir,name,'seq.fasta'),'r') as f: |
| pep_len = len(f.readlines()[1].strip()) |
| design_only_positions=" ".join(map(str,list(range(1,pep_len+1)))) |
| |
| |
| |
| subprocess.run([ |
| "python", os.path.join(HELPERS,"parse_multiple_chains.py"), |
| "--input_path", os.path.join('./Data/Baselines_new/Codesign',name,'rfs'), |
| "--output_path", path_for_parsed_chains, |
| ]) |
| subprocess.run([ |
| "python", os.path.join(HELPERS,"assign_fixed_chains.py"), |
| "--input_path", path_for_parsed_chains, |
| "--output_path", path_for_assigned_chains, |
| '--chain_list', chains_to_design, |
| ]) |
| subprocess.run([ |
| "python", os.path.join(HELPERS,"make_fixed_positions_dict.py"), |
| "--input_path", path_for_parsed_chains, |
| "--output_path", path_for_fixed_positions, |
| '--chain_list', chains_to_design, |
| '--position_list', design_only_positions, |
| '--specify_non_fixed' |
| ]) |
| |
| |
| subprocess.run([ |
| "python", RUNNER, |
| "--jsonl_path", path_for_parsed_chains, |
| "--chain_id_jsonl", path_for_assigned_chains, |
| "--fixed_positions_jsonl", path_for_fixed_positions, |
| "--out_folder", dirname, |
| "--num_seq_per_target", f"{num_samples}", |
| "--sampling_temp", "0.1", |
| "--seed", "37", |
| "--batch_size","1", |
| '--device','cuda:1' |
| ]) |
|
|
|
|
| def write_seq_to_pdb(seq_path,pdb_path,out_path,chain_id): |
| |
| aa_mapping = {"A": "ALA","C": "CYS","D": "ASP","E": "GLU","F": "PHE","G": "GLY","H": "HIS","I": "ILE","K": "LYS","L": "LEU","M": "MET","N": "ASN","P": "PRO","Q": "GLN","R": "ARG","S": "SER","T": "THR","V": "VAL","W": "TRP","Y": "TYR", |
| 'X':'UNK'} |
| tmps = [] |
| for record in SeqIO.parse(seq_path, "fasta"): |
| tmps.append(str(record.seq)) |
| seq = tmps[-1] |
| |
| parser = PDBParser() |
| structure = parser.get_structure("X", pdb_path) |
| model = structure[0] |
| for chain in model: |
| if chain.id == chain_id: |
| for i,res in enumerate(chain): |
| if i<len(seq): |
| res.resname = aa_mapping[seq[i]] |
| io = PDBIO() |
| io.set_structure(structure) |
| io.save(out_path) |