Simon Duerr
initial commit
227b78b
raw
history blame
No virus
2.26 kB
import pandas as pd
import numpy as np
import glob
import random
import numpy as np
import json
def softmax(x, T):
return np.exp(x/T)/np.sum(np.exp(x/T), -1, keepdims=True)
def parse_pssm(path):
data = pd.read_csv(path, skiprows=2)
floats_list_list = []
for i in range(data.values.shape[0]):
str1 = data.values[i][0][4:]
floats_list = []
for item in str1.split():
floats_list.append(float(item))
floats_list_list.append(floats_list)
np_lines = np.array(floats_list_list)
return np_lines
np_lines = parse_pssm('/home/swang523/RLcage/capsid/monomersfordesign/8-16-21/pssm_rainity_final_8-16-21_int/build_0.2089_0.98_0.4653_19_2.00_0.005745.pssm')
mpnn_alphabet = 'ACDEFGHIKLMNPQRSTVWYX'
input_alphabet = 'ARNDCQEGHILKMFPSTWYV'
permutation_matrix = np.zeros([20,21])
for i in range(20):
letter1 = input_alphabet[i]
for j in range(21):
letter2 = mpnn_alphabet[j]
if letter1 == letter2:
permutation_matrix[i,j]=1.
pssm_log_odds = np_lines[:,:20] @ permutation_matrix
pssm_probs = np_lines[:,20:40] @ permutation_matrix
X_mask = np.concatenate([np.zeros([1,20]), np.ones([1,1])], -1)
def softmax(x, T):
return np.exp(x/T)/np.sum(np.exp(x/T), -1, keepdims=True)
#Load parsed PDBs:
with open('/home/justas/projects/cages/parsed/test.jsonl', 'r') as json_file:
json_list = list(json_file)
my_dict = {}
for json_str in json_list:
result = json.loads(json_str)
all_chain_list = [item[-1:] for item in list(result) if item[:9]=='seq_chain']
pssm_dict = {}
for chain in all_chain_list:
pssm_dict[chain] = {}
pssm_dict[chain]['pssm_coef'] = (np.ones(len(result['seq_chain_A']))).tolist() #a number between 0.0 and 1.0 specifying how much attention put to PSSM, can be adjusted later as a flag
pssm_dict[chain]['pssm_bias'] = (softmax(pssm_log_odds-X_mask*1e8, 1.0)).tolist() #PSSM like, [length, 21] such that sum over the last dimension adds up to 1.0
pssm_dict[chain]['pssm_log_odds'] = (pssm_log_odds).tolist()
my_dict[result['name']] = pssm_dict
#Write output to:
with open('/home/justas/projects/lab_github/mpnn/data/pssm_dict.jsonl', 'w') as f:
f.write(json.dumps(my_dict) + '\n')