LigUnity / HGNN /read_fasta.py
Irwiny123's picture
提交LigUnity初始代码
94391f2
import os, re
import prody as pr
from tqdm import tqdm
from multiprocessing import Pool
# import subprocess
# os.environ["BABEL_LIBDIR"] = "/home/shenchao/.conda/envs/my2/lib/openbabel/3.1.0"
def write_file(output_file, outline):
buffer = open(output_file, 'w')
buffer.write(outline)
buffer.close()
def lig_rename(infile, outfile):
##some peptides may impede the generation of pocket, so rename the ligname first.
lines = open(infile, 'r').readlines()
newlines = []
for line in lines:
if re.search(r'^HETATM|^ATOM', line):
newlines.append(line[:17] + "LIG" + line[20:])
else:
newlines.append(line)
write_file(outfile, ''.join(newlines))
def check_mol(infile, outfile):
# Some metals may have the same ID as ligand, thus making ligand included in the pocket.
os.system("cat %s | sed '/LIG/d' > %s" % (infile, outfile))
def extract_pocket(protpath,
ligpath,
cutoff=5.0,
protname=None,
ligname=None,
pdb_pocket_file=None,
workdir='.'):
"""
protpath: the path of protein file (.pdb).
ligpath: the path of ligand file (.sdf|.mol2|.pdb).
cutoff: the distance range within the ligand to determine the pocket.
protname: the name of the protein.
ligname: the name of the ligand.
workdir: working directory.
"""
if protname is None:
protname = os.path.basename(protpath).split('.')[0]
if ligname is None:
ligname = os.path.basename(ligpath).split('.')[0]
if not re.search(r'.pdb$', ligpath):
os.system(f"obabel {ligpath} -O {workdir}/{ligname}.pdb")
else:
os.system(f"cp {ligpath} {workdir}/{ligname}.pdb")
xprot = pr.parsePDB(protpath)
# xlig = pr.parsePDB("%s/%s.pdb"%(workdir, ligname))
# if (xlig.getResnames() == xlig.getResnames()[0]).all():
# lresname = xlig.getResnames()[0]
# else:
lig_rename("%s/%s.pdb" % (workdir, ligname), "%s/%s2.pdb" % (workdir, ligname))
os.remove("%s/%s.pdb" % (workdir, ligname))
os.rename("%s/%s2.pdb" % (workdir, ligname), "%s/%s.pdb" % (workdir, ligname))
xlig = pr.parsePDB("%s/%s.pdb" % (workdir, ligname))
lresname = xlig.getResnames()[0]
xcom = xlig + xprot
# select ONLY atoms that belong to the protein
ret = xcom.select(f'same residue as exwithin %s of resname %s' % (cutoff, lresname))
pr.writePDB("%s/%s_pocket_%s_temp.pdb" % (workdir, protname, cutoff), ret)
# ret = pr.parsePDB("%s/%s_pocket_%s.pdb"%(workdir, protname, cutoff))
check_mol("%s/%s_pocket_%s_temp.pdb" % (workdir, protname, cutoff), pdb_pocket_file)
os.remove("%s/%s_pocket_%s_temp.pdb" % (workdir, protname, cutoff))
def get_fasta_seq(fasta_file):
with open(fasta_file) as f:
lines = []
for line in f.readlines():
lines.append(line.strip())
fasta = "".join(lines[1:])
return fasta
def read_fasta_from_protein(pdb_file, lig_file, target_id="test", cutoff=5.0, pdb_pocket_file="test_pocket.pdb", fasta_pocket_file="test_pocket.fasta"):
if not os.path.exists(pdb_file):
return
try:
extract_pocket(pdb_file,
lig_file,
cutoff=cutoff,
protname=target_id,
pdb_pocket_file=pdb_pocket_file,
ligname=f"{target_id}_ligand")
except Exception as e:
print(e)
return
os.system(f"./pdb2fasta {pdb_pocket_file} > {fasta_pocket_file}")
return get_fasta_seq(fasta_pocket_file)
def read_fasta_from_pocket(pocket_pdb_file, fasta_pocket_file="test_pocket.fasta"):
os.system(f"./pdb2fasta {pocket_pdb_file} > {fasta_pocket_file}")
return get_fasta_seq(fasta_pocket_file)