import os |
import sys |
import pandas as pd |
import torch |
import numpy as np |
import matplotlib.pyplot as plt |
import seaborn as sns |
import linecache |
import re |
from Bio.PDB import PDBParser, PDBIO |
import math |
from Bio.PDB import PDBIO |
from Bio.PDB import PDBParser |
from Bio.PDB import Superimposer |
from Bio.PDB.vectors import calc_angle, calc_dihedral |
import Bio.PDB.vectors |
from Bio.PDB.DSSP import DSSP |
resdict = { |
"ALA": "A", |
"ARG": "R", |
"ASN": "N", |
"ASP": "D", |
"CYS": "C", |
"GLN": "Q", |
"GLU": "E", |
"GLY": "G", |
"HIS": "H", |
"HSD": "H", |
"HSE": "H", |
"HSP": "H", |
"ILE": "I", |
"LYS": "K", |
"LEU": "L", |
"MET": "M", |
"PHE": "F", |
"PRO": "P", |
"SER": "S", |
"THR": "T", |
"TRP": "W", |
"TYR": "Y", |
"VAL": "V", |
} |
SMD_Vel = 0.0001 |
def collect_geo_of_backbone(chain): |
prev = "0" |
rad = 180.0 / math.pi |
resu = {"AA":[],\ |
"Bond_CA_N":[],"Bond_CA_C":[],"Bond_N_C1":[],\ |
"Angl_CA1_C1_N":[],"Angl_C1_N_CA":[],"Angl_N_CA_C":[],\ |
"Dihe_PHI":[],"Dihe_PSI":[],"Dihe_OME":[]} |
for res in chain: |
if res.get_resname() in resdict.keys(): |
resu["AA"].append(resdict[res.get_resname()]) |
if prev == "0": |
N_prev = res["N"] |
CA_prev = res["CA"] |
C_prev = res["C"] |
prev = "1" |
else: |
n1 = N_prev.get_vector() |
ca1 = CA_prev.get_vector() |
c1 = C_prev.get_vector() |
C_curr = res["C"] |
N_curr = res["N"] |
CA_curr = res["CA"] |
c = C_curr.get_vector() |
n = N_curr.get_vector() |
ca = CA_curr.get_vector() |
ca1_c1_n_ThisAngle = calc_angle(ca1, c1, n)*rad |
c1_n_ca_ThisAngle = calc_angle(c1, n, ca)*rad |
n_ca_c_ThisAngle = calc_angle(n, ca, c)*rad |
ca_n_ThisBond = CA_curr - N_curr |
ca_c_ThisBond = CA_curr - C_curr |
n_c1_ThisBond = N_curr - C_prev |
ThisPsi = calc_dihedral(n1, ca1, c1, n) |
ThisOmega = calc_dihedral(ca1, c1, n, ca) |
ThisPhi = calc_dihedral(c1, n, ca, c) |
resu["Bond_CA_N"].append(ca_n_ThisBond) |
resu["Bond_CA_C"].append(ca_c_ThisBond) |
resu["Bond_N_C1"].append(n_c1_ThisBond) |
resu["Angl_CA1_C1_N"].append(ca1_c1_n_ThisAngle) |
resu["Angl_C1_N_CA"].append(c1_n_ca_ThisAngle) |
resu["Angl_N_CA_C"].append(n_ca_c_ThisAngle) |
resu["Dihe_PHI"].append(ThisPhi) |
resu["Dihe_PSI"].append(ThisPsi) |
resu["Dihe_OME"].append(ThisOmega) |
N_prev = res["N"] |
CA_prev = res["CA"] |
C_prev = res["C"] |
return resu |
def collect_multi_chain_AA_info(pdb_file): |
parser = PDBParser() |
structure = parser.get_structure("sample", pdb_file) |
resu_full = {"Chain":[],"AA":{}} |
for chain in structure.get_chains(): |
this_chain_id = chain.get_id() |
resu_full["Chain"].append(this_chain_id) |
resu_test = collect_geo_of_backbone(chain) |
resu_full["AA"][this_chain_id]=resu_test["AA"] |
return resu_full |
def get_one_force_record(ii, resu_file_name_list): |
pdb_id = resu_file_name_list['PDB_ID'][ii] |
data_one_file = resu_file_name_list['Path'][ii]+'/1_working_dir/collect_results/smd_resu.dat' |
data = np.genfromtxt(data_one_file) |
disp_data = data[:,1] |
force_data = data[:,7] |
step_data = data[:,0] |
setdata_one_file = resu_file_name_list['Path'][ii]+'/1_working_dir/box_dimension_after_eq.dat' |
line_4 = linecache.getline(setdata_one_file, 4) |
SMD_Vel = float(line_4.split()[2]) |
pull_data = SMD_Vel*step_data |
return disp_data, force_data, pdb_id, pull_data |
def get_one_AA_record(ii, resu_file_name_list): |
pdb_file = resu_file_name_list['Path'][ii]+'/1_working_dir/TestProt_chain_0_after_psf.pdb' |
resu_full = collect_multi_chain_AA_info(pdb_file) |
AA_seq = ''.join(resu_full["AA"][resu_full["Chain"][0]]) |
return AA_seq |
def conv_one_record(force_data, kernel_size): |
kernel = np.ones(kernel_size) / kernel_size |
force_data_convolved = np.convolve(force_data, kernel, mode='same') |
return force_data_convolved |
from math import factorial |
from scipy.ndimage.filters import uniform_filter1d |
def savitzky_golay(y, window_size, order, deriv=0, rate=1): |
try: |
window_size = np.abs(int(window_size)) |
order = np.abs(int(order)) |
except ValueError: |
raise ValueError("window_size and order have to be of type int") |
if window_size % 2 != 1 or window_size < 1: |
raise TypeError("window_size size must be a positive odd number") |
if window_size < order + 2: |
raise TypeError("window_size is too small for the polynomials order") |
order_range = range(order+1) |
half_window = (window_size -1) // 2 |
b = np.mat([[k**i for i in order_range] for k in range(-half_window, half_window+1)]) |
m = np.linalg.pinv(b).A[deriv] * rate**deriv * factorial(deriv) |
firstvals = y[0] - np.abs( y[1:half_window+1][::-1] - y[0] ) |
lastvals = y[-1] + np.abs(y[-half_window-1:-1][::-1] - y[-1]) |
y = np.concatenate((firstvals, y, lastvals)) |
return np.convolve( m[::-1], y, mode='valid') |
def read_gap_values_from_dat(file): |
line_2 = linecache.getline(file, 2) |
line_3 = linecache.getline(file, 3) |
ini_gap = float(line_2.split()[2]) |
fin_gap = float(line_3.split()[2]) |
return ini_gap, fin_gap |
def read_one_array_from_df(one_record): |
return np.array(list(map(float, one_record.split(" ")))) |
def read_string_find_max(reco): |
x = read_one_array_from_df(reco) |
return np.amax(x) |
def cal_seq_end_gap(x): |
inc_gap_arr = x['posi_data']-x['posi_data'][0] |
ini_gap = x['ini_gap'] |
gap_arr = ini_gap+inc_gap_arr |
return gap_arr |
def cal_pull_end_gap(x): |
inc_gap_arr = x['pull_data'] |
ini_gap = x['ini_gap'] |
gap_arr = ini_gap+inc_gap_arr |
return gap_arr |
def simplify_NormPull_FORCEnF_rec(n_fold,this_seq_len,this_n_PullGap_arr,this_Force_arr): |
target_pull_gap_list = [1./(this_seq_len*n_fold)*(jj+0) for jj in range(this_seq_len*n_fold)] |
target_pull_gap_list.append(1.) |
target_force = [] |
for jj in range(len(target_pull_gap_list)): |
this_t_n_PullGap = target_pull_gap_list[jj] |
if this_t_n_PullGap<this_n_PullGap_arr[0]: |
this_t_F = 0. |
else: |
disp_arr = np.abs(this_n_PullGap_arr - this_t_n_PullGap) |
pick_id = np.argmin(disp_arr) |
this_t_F = this_Force_arr[pick_id] |
target_force.append(this_t_F) |
target_pull_gap_arr = np.array(target_pull_gap_list) |
target_force_arr = np.array(target_force) |
resu = {} |
resu['sample_NormPullGap'] = target_pull_gap_arr |
resu['smaple_FORCE'] = target_force_arr |
return resu |
def read_input_model_A(file_path): |
with open(file_path, 'r') as f: |
txt = f.read() |
nums = re.findall(r'\[([^][]+)\]', txt) |
arr = np.loadtxt(nums) |
return arr |
def read_input_model_B(file_path): |
with open(file_path, 'r') as f: |
txt = f.read() |
nums = re.findall(r'\[([^][]+)\]', txt) |
arr = np.loadtxt( [nums[0].replace('\n','')] ) |
return arr |
def read_one_input_arr_from_txt(file_path): |
with open(file_path, 'r') as f: |
txt = f.read() |
nums = re.findall(r'\[([^][]+)\]', txt) |
arr = np.loadtxt( [nums[0].replace('\n','')] ) |
return arr |
def recover_input_for_model_B_ver2(file_path, seq_len): |
raw_arr = read_one_input_arr_from_txt(file_path) |
arr = raw_arr[0:0+seq_len+1] |
return arr |
def recover_input_for_model_B_ver3(file_path, seq_len): |
raw_arr = read_one_input_arr_from_txt(file_path) |
arr = np.zeros(seq_len+1) |
arr[1:1+seq_len] = raw_arr[0:0+seq_len] |
return arr |