|
|
import os |
|
|
import glob |
|
|
import pandas as pd |
|
|
import subprocess |
|
|
from difflib import SequenceMatcher |
|
|
|
|
|
from Bio import SeqIO |
|
|
from Bio.PDB import PDBParser, PDBIO, Chain, Select, is_aa |
|
|
from Bio.PDB.Polypeptide import PPBuilder |
|
|
|
|
|
from Bio.PDB import PDBParser |
|
|
from Bio.SeqUtils import seq1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_fasta_from_pdb(pdb_file): |
|
|
parser = PDBParser() |
|
|
structure = parser.get_structure("pdb", pdb_file) |
|
|
|
|
|
fasta_sequence = {} |
|
|
for chain in structure.get_chains(): |
|
|
seq = "" |
|
|
for residue in chain.get_residues(): |
|
|
seq += seq1(residue.get_resname()) |
|
|
fasta_sequence[chain.id] = seq |
|
|
|
|
|
return fasta_sequence |
|
|
|
|
|
def parse_fasta(file): |
|
|
sequences = {} |
|
|
with open(file, "r") as fasta_file: |
|
|
for i, record in enumerate(SeqIO.parse(fasta_file, "fasta")): |
|
|
sequences[i] = str(record.seq).split("/") |
|
|
return sequences |
|
|
|
|
|
def renumber_pdb(input_pdb, output_pdb): |
|
|
parser = PDBParser() |
|
|
structure = parser.get_structure("protein", input_pdb) |
|
|
|
|
|
chain_dic = {} |
|
|
|
|
|
for model in structure: |
|
|
old_chains = [] |
|
|
new_chains = [] |
|
|
for chain in model: |
|
|
new_chain_id = chain.id + "_renum" |
|
|
new_chain = Chain.Chain(new_chain_id) |
|
|
for i, residue in enumerate(chain): |
|
|
new_residue = residue.copy() |
|
|
new_residue_id = (residue.id[0], i + 1, residue.id[2]) |
|
|
new_residue.id = new_residue_id |
|
|
new_chain.add(new_residue) |
|
|
old_chains.append(chain) |
|
|
new_chains.append(new_chain) |
|
|
chain_dic[chain.id] = len(list(chain)) |
|
|
|
|
|
for chain, new_chain in zip(old_chains, new_chains): |
|
|
model.detach_child(chain.id) |
|
|
new_chain.id = chain.id |
|
|
model.add(new_chain) |
|
|
|
|
|
io = PDBIO() |
|
|
io.set_structure(structure) |
|
|
io.save(output_pdb) |
|
|
|
|
|
return chain_dic |
|
|
|
|
|
def get_chain_dic(input_pdb): |
|
|
parser = PDBParser() |
|
|
structure = parser.get_structure("protein", input_pdb) |
|
|
|
|
|
chain_dic = {} |
|
|
|
|
|
for model in structure: |
|
|
for chain in model: |
|
|
chain_dic[chain.id] = len([res for res in chain if is_aa(res) and res.has_id('CA')]) |
|
|
|
|
|
return chain_dic |
|
|
|
|
|
|
|
|
def keep_backbone_atoms(input_file, output_file): |
|
|
|
|
|
class BackboneSelect(Select): |
|
|
def accept_atom(self, atom): |
|
|
return atom.get_name() in ["N", "CA", "C", "O"] |
|
|
|
|
|
parser = PDBParser() |
|
|
io = PDBIO() |
|
|
|
|
|
structure = parser.get_structure("protein", input_file) |
|
|
|
|
|
io.set_structure(structure) |
|
|
io.save(output_file, BackboneSelect()) |