|
import sys |
|
import os |
|
import time |
|
import requests |
|
|
|
|
|
|
|
|
|
|
|
|
|
import warnings |
|
import sys |
|
import numpy as np |
|
import json |
|
import pickle |
|
import tensorflow as tf |
|
from tensorflow.keras.models import load_model |
|
import os |
|
from Bio.Blast import NCBIWWW |
|
from Bio.Blast import NCBIXML |
|
from Bio.SeqUtils import IsoelectricPoint as IP |
|
from rdkit import Chem |
|
from rdkit.Chem import rdMolDescriptors |
|
from rdkit.Chem import Descriptors |
|
|
|
from Bio.SeqUtils.ProtParam import ProteinAnalysis |
|
import numpy as np |
|
from scipy.constants import e, epsilon_0 |
|
from scipy.constants import Boltzmann |
|
import datetime |
|
import random |
|
import string |
|
from rdkit.Chem import AllChem |
|
from rdkit import Chem |
|
from rdkit.Chem import Descriptors |
|
from scipy.constants import e |
|
import pandas as pd |
|
from rdkit.Chem import rdMolTransforms |
|
|
|
|
|
|
|
|
|
|
|
|
|
from qiskit_machine_learning.algorithms import VQC, VQR |
|
|
|
import urllib |
|
|
|
|
|
|
|
warnings.filterwarnings('ignore') |
|
|
|
asset_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'asset') |
|
image_dir = os.path.join(asset_dir, 'img') |
|
model_dir = os.path.join(asset_dir, 'model') |
|
qmodel_dir = os.path.join(model_dir, 'Quantum Model') |
|
label_dir = os.path.join(asset_dir, 'label') |
|
data_dir = os.path.join(asset_dir, 'data') |
|
json_dir = os.path.join(asset_dir, 'json') |
|
|
|
icon_img = os.path.join(image_dir, 'kaede_kayano.jpg') |
|
|
|
def get_base_path(): |
|
return os.path.dirname(os.path.abspath(__file__)) |
|
|
|
def ml_dock(data): |
|
|
|
with open(os.path.join(model_dir, 'Linear_Regression_Model.pkl'), "rb") as f: |
|
model = pickle.load(f) |
|
|
|
|
|
predictions = model.predict([data]) |
|
|
|
return predictions[0] |
|
|
|
def qml_dock(data, sampler=None): |
|
try: |
|
if sampler == None: |
|
|
|
vqr = VQR.load(os.path.join(qmodel_dir, "VQR_quantum regression-based scoring function")) |
|
|
|
prediction = vqr.predict([data]) |
|
return prediction[0][0] |
|
else: |
|
|
|
vqr = VQR.load(os.path.join(qmodel_dir, "VQR_quantum regression-based scoring function")) |
|
|
|
|
|
prediction = vqr.predict([data]) |
|
return prediction[0][0] |
|
except: |
|
return None |
|
|
|
def generate_random_code(length): |
|
characters = string.ascii_letters + string.digits |
|
return ''.join(random.choice(characters) for i in range(length)) |
|
|
|
def generate_filename_with_timestamp_and_random(condition="classic"): |
|
|
|
now = datetime.datetime.now() |
|
|
|
|
|
date_time_str = now.strftime("%d%m%Y_%H%M%S") |
|
|
|
|
|
random_code = generate_random_code(15) |
|
|
|
if condition == "classic": |
|
|
|
filename = f"{date_time_str}_{random_code}" |
|
return filename |
|
else: |
|
|
|
filename = f"{date_time_str}_{random_code}_quantum" |
|
return filename |
|
|
|
def create_folder(folder_path): |
|
try: |
|
os.makedirs(folder_path) |
|
print(f"Folder '{folder_path}' created successfully.") |
|
except FileExistsError: |
|
print(f"Folder '{folder_path}' already exists.") |
|
|
|
def minmaxint(val, max_val): |
|
if isinstance(val, int): |
|
if val < 0: |
|
return 1 |
|
elif val > max_val: |
|
return max_val |
|
else: |
|
return val |
|
else: |
|
return 1 |
|
|
|
def download_file(url, save_as): |
|
response = requests.get(url, stream=True, verify=False, timeout=6000) |
|
with open(save_as, 'wb') as f: |
|
for chunk in response.iter_content(chunk_size=1024000): |
|
if chunk: |
|
f.write(chunk) |
|
f.flush() |
|
return save_as |
|
|
|
def process_text_for_url(text_input): |
|
""" |
|
Fungsi untuk mengubah teks input agar sesuai dengan format URL. |
|
|
|
Args: |
|
text_input (str): Teks yang ingin diubah. |
|
|
|
Returns: |
|
str: Teks yang telah diubah menjadi format URL yang sesuai. |
|
""" |
|
|
|
processed_text = text_input.replace(" ", "%20") |
|
|
|
|
|
processed_text = urllib.parse.quote_plus(processed_text) |
|
|
|
return processed_text |
|
|
|
def request_url(url_input, text_input): |
|
""" |
|
Fungsi untuk melakukan request URL dengan parameter teks. |
|
|
|
Args: |
|
url_input (str): URL yang ingin diakses. |
|
text_input (str): Teks yang akan digunakan sebagai request. |
|
|
|
Returns: |
|
Response: Objek respons dari request URL. |
|
""" |
|
|
|
|
|
processed_text = process_text_for_url(text_input) |
|
|
|
|
|
full_url = url_input + "?string_input="+ processed_text |
|
print(full_url) |
|
|
|
|
|
response = requests.get(full_url) |
|
|
|
if response.status_code == 200: |
|
result = response.text |
|
return result |
|
else: |
|
return "Gagal" |
|
|
|
def export_string_to_text_file(string, filename): |
|
"""Exports a string to a text file. |
|
|
|
Args: |
|
string: The string to export. |
|
filename: The filename to save the string to. |
|
""" |
|
with open(filename, 'w') as text_file: |
|
text_file.write(string) |
|
|
|
print("Starting App...") |
|
|
|
class ReVa: |
|
def preprocessing_begin(seq): |
|
seq = str(seq).upper() |
|
delete_char = "BJOUXZ\n\t 1234567890*&^%$#@!~()[];:',.<><?/" |
|
for i in range(len(delete_char)): |
|
seq = seq.replace(delete_char[i],'') |
|
return seq |
|
|
|
|
|
def __init__(self, sequence, base_path, target_path, n_receptor, n_adjuvant, blast_activate=False, llm_url="", alphafold_url=""): |
|
self.sequence = ReVa.preprocessing_begin(sequence) |
|
self.base_path = base_path |
|
self.blast_activate = blast_activate |
|
self.target_path = target_path |
|
self.n_receptor = n_receptor |
|
self.n_adjuvant = n_adjuvant |
|
self.llm_url = llm_url |
|
self.alphafold_url = alphafold_url |
|
create_folder(self.target_path) |
|
|
|
self.alphabet = 'ACDEFGHIKLMNPQRSTVWY' |
|
self.num_features = len(self.alphabet) |
|
|
|
try: |
|
model_path = 'BPepTree.pkl' |
|
self.loaded_Bmodel = ReVa.load_pickle_model(self.base_path, model_path) |
|
except: |
|
print("Error Load Model Epitope") |
|
|
|
|
|
try: |
|
model_path = 'TPepTree.pkl' |
|
self.loaded_Tmodel = ReVa.load_pickle_model(self.base_path, model_path) |
|
except: |
|
print("Error") |
|
|
|
|
|
try: |
|
with open(os.path.join(label_dir, 'allergenicity_label_mapping.json'), 'r') as f: |
|
label_dict = json.load(f) |
|
except: |
|
print("Error") |
|
|
|
|
|
self.reverse_label_mapping_allergen = {v: k for k, v in label_dict.items()} |
|
self.seq_length_allergen = 4857 |
|
|
|
try: |
|
with open(os.path.join(label_dir,'toxin_label_mapping.json'), 'r') as f: |
|
label_dict = json.load(f) |
|
except: |
|
print("Error") |
|
|
|
|
|
self.reverse_label_mapping_toxin = {v: k for k, v in label_dict.items()} |
|
self.seq_length_toxin = 35 |
|
|
|
try: |
|
with open(os.path.join(label_dir, 'antigenicity_label_mapping.json'), 'r') as f: |
|
label_dict = json.load(f) |
|
except: |
|
print("Error") |
|
|
|
|
|
self.reverse_label_mapping_antigen = {v: k for k, v in label_dict.items()} |
|
self.seq_length_antigen = 83 |
|
|
|
try: |
|
with open(os.path.join(label_dir,'BPepTree_label.json'), 'r') as f: |
|
label_dict = json.load(f) |
|
self.Blabel = ReVa.invert_dict(label_dict) |
|
except: |
|
print("Error") |
|
|
|
|
|
try: |
|
with open(os.path.join(label_dir,'TPepTree_label.json'), 'r') as f: |
|
label_dict = json.load(f) |
|
self.Tlabel = ReVa.invert_dict(label_dict) |
|
except: |
|
print("Error") |
|
|
|
|
|
def load_pickle_model(base_path, model_path): |
|
with open(os.path.join(model_dir, model_path), 'rb') as f: |
|
model = pickle.load(f) |
|
return model |
|
|
|
def combine_lists(list1, list2): |
|
result = [] |
|
current_group = "" |
|
|
|
for i in range(len(list1)): |
|
if list2[i] == 'E': |
|
current_group += list1[i] |
|
else: |
|
if current_group: |
|
result.append(current_group) |
|
current_group = "" |
|
result.append(list1[i]) |
|
|
|
if current_group: |
|
result.append(current_group) |
|
|
|
return result |
|
|
|
def engelman_ges_scale(aa): |
|
scale = { |
|
'A': 1.60, 'C': 2.00, 'D': -9.20, 'E': -8.20, 'F': 3.70, |
|
'G': 1.00, 'H': -3.00, 'I': 3.10, 'K': -8.80, 'L': 2.80, |
|
'M': 3.40, 'N': -4.80, 'P': -0.20, 'Q': -4.10, 'R': -12.3, |
|
'S': 0.60, 'T': 1.20, 'V': 2.60, 'W': 1.90, 'Y': -0.70 |
|
} |
|
return scale.get(aa, 0.0) |
|
|
|
def get_position(aa): |
|
pos = [i+1 for i in range(0, len(aa))] |
|
return pos |
|
|
|
def one_hot_encoding(self, sequence): |
|
encoding = [] |
|
for char in sequence: |
|
vector = [0] * self.num_features |
|
if char in self.alphabet: |
|
index = self.alphabet.index(char) |
|
vector[index] = 1 |
|
encoding.append(vector) |
|
return encoding |
|
|
|
def extraction_feature(aa): |
|
pos = ReVa.get_position(aa) |
|
scale = [ReVa.engelman_ges_scale(aa[i]) for i in range(len(aa))] |
|
|
|
res = [[pos[i], scale[i], len(aa)] for i in range(len(pos))] |
|
|
|
return res |
|
|
|
def predict_label_and_probability_allergenicity(self, sequence): |
|
try: |
|
model_path = 'allerginicity.h5' |
|
model = load_model(os.path.join(model_dir, model_path)) |
|
except: |
|
print("Error") |
|
|
|
|
|
try: |
|
sequence = sequence[:self.seq_length_allergen] |
|
sequence = [ReVa.one_hot_encoding(self, seq) for seq in [sequence]] |
|
sequence = [seq + [[0] * self.num_features] * (self.seq_length_allergen - len(seq)) for seq in sequence] |
|
sequence = np.array(sequence) |
|
except: |
|
print("Error") |
|
|
|
|
|
try: |
|
prediction = model.predict(sequence)[0] |
|
predicted_label_index = 1 if prediction > 0.5 else 0 |
|
predicted_label = self.reverse_label_mapping_allergen[predicted_label_index] |
|
|
|
return predicted_label, prediction |
|
except: |
|
print("Error") |
|
|
|
|
|
def predict_label_and_probability_toxin(self, sequence): |
|
try: |
|
model_path = 'toxin.h5' |
|
model = load_model(os.path.join(model_dir, model_path)) |
|
except: |
|
print("Error") |
|
|
|
|
|
sequence = sequence[:self.seq_length_toxin] |
|
sequence = [ReVa.one_hot_encoding(self, seq) for seq in [sequence]] |
|
sequence = [seq + [[0] * self.num_features] * (self.seq_length_toxin - len(seq)) for seq in sequence] |
|
sequence = np.array(sequence) |
|
|
|
try: |
|
prediction = model.predict(sequence)[0] |
|
predicted_label_index = 1 if prediction > 0.5 else 0 |
|
predicted_label = self.reverse_label_mapping_toxin[predicted_label_index] |
|
|
|
return predicted_label, prediction |
|
except: |
|
print("Error") |
|
|
|
|
|
def predict_label_and_probability_antigenicity(self, sequence): |
|
try: |
|
model_path = 'antigenicity.h5' |
|
model = load_model(os.path.join(model_dir, model_path)) |
|
except: |
|
print("Error") |
|
|
|
|
|
sequence = sequence[:self.seq_length_antigen] |
|
sequence = [ReVa.one_hot_encoding(self, seq) for seq in [sequence]] |
|
sequence = [seq + [[0] * self.num_features] * (self.seq_length_antigen - len(seq)) for seq in sequence] |
|
sequence = np.array(sequence) |
|
|
|
try: |
|
prediction = model.predict(sequence)[0] |
|
predicted_label_index = 1 if prediction > 0.5 else 0 |
|
predicted_label = self.reverse_label_mapping_antigen[predicted_label_index] |
|
|
|
return predicted_label, prediction |
|
except: |
|
print("Error") |
|
|
|
|
|
def invert_dict(dictionary): |
|
inverted_dict = {value: key for key, value in dictionary.items()} |
|
return inverted_dict |
|
|
|
def process_epitope(input_list): |
|
output_list = [] |
|
current_group = [] |
|
|
|
for item in input_list: |
|
if item == 'E': |
|
current_group.append(item) |
|
else: |
|
if current_group: |
|
output_list.append(''.join(current_group)) |
|
current_group = [] |
|
output_list.append(item) |
|
|
|
if current_group: |
|
output_list.append(''.join(current_group)) |
|
|
|
return output_list |
|
|
|
def filter_epitope(data): |
|
filtered_seq = [] |
|
filtered_label = [] |
|
|
|
for i in range(len(data['seq'])): |
|
if data['label'][i] != '.': |
|
filtered_seq.append(data['seq'][i]) |
|
filtered_label.append(data['label'][i]) |
|
|
|
filtered_data = {'seq': filtered_seq, 'label': filtered_label} |
|
return filtered_data |
|
|
|
def string_to_list(input_string): |
|
return list(input_string) |
|
|
|
def calculate_hydrophobicity(sequence): |
|
hydrophobic_residues = ['A', 'I', 'L', 'M', 'F', 'V', 'W', 'Y'] |
|
hydrophilic_residues = ['R', 'N', 'C', 'Q', 'E', 'G', 'H', 'K', 'S', 'T', 'D'] |
|
hydrophobicity_scores = { |
|
'A': 0.62, 'R': -2.53, 'N': -0.78, 'D': -0.90, 'C': 0.29, |
|
'Q': -0.85, 'E': -0.74, 'G': 0.48, 'H': -0.40, 'I': 1.38, |
|
'L': 1.06, 'K': -1.50, 'M': 0.64, 'F': 1.19, 'P': 0.12, |
|
'S': -0.18, 'T': -0.05, 'W': 0.81, 'Y': 0.26, 'V': 1.08 |
|
} |
|
|
|
hydrophobicity = 0 |
|
for residue in sequence: |
|
if residue in hydrophobic_residues: |
|
hydrophobicity += hydrophobicity_scores[residue] |
|
elif residue in hydrophilic_residues: |
|
hydrophobicity -= hydrophobicity_scores[residue] |
|
else: |
|
hydrophobicity -= 0.5 |
|
|
|
return hydrophobicity / len(sequence) |
|
|
|
def antigenicity(sequence, window_size=7): |
|
antigenicity_scores = [] |
|
for i in range(len(sequence) - window_size + 1): |
|
window = sequence[i:i+window_size] |
|
antigenicity_score = sum([1 if window[j] == 'A' or window[j] == 'G' else 0 for j in range(window_size)]) |
|
antigenicity_scores.append(antigenicity_score) |
|
return antigenicity_scores |
|
|
|
def emini_surface_accessibility(sequence, window_size=9): |
|
surface_accessibility_scores = [] |
|
for i in range(len(sequence) - window_size + 1): |
|
window = sequence[i:i+window_size] |
|
surface_accessibility_score = sum([1 if window[j] in ['S', 'T', 'N', 'Q'] else 0 for j in range(window_size)]) |
|
surface_accessibility_scores.append(surface_accessibility_score) |
|
return surface_accessibility_scores |
|
|
|
def perform_blastp(query_sequence, self): |
|
|
|
|
|
if self.blast_activate == True: |
|
start = time.time() |
|
try: |
|
result_handle = NCBIWWW.qblast("blastp", "nr", query_sequence) |
|
except Exception as e: |
|
|
|
print("BLASTp failed to connect") |
|
return "Skip because any error" |
|
|
|
|
|
print("BLASTp Starting...") |
|
blast_records = NCBIXML.parse(result_handle) |
|
for blast_record in blast_records: |
|
for alignment in blast_record.alignments: |
|
|
|
|
|
for hsp in alignment.hsps: |
|
similarity = (hsp.positives / hsp.align_length) * 100 |
|
if similarity > 80: |
|
return similarity |
|
print("BLASTp Finisihing...") |
|
end = time.time() |
|
time_blast = end-start |
|
print(f"Time for BLASTp : {time_blast} s") |
|
|
|
return "Non-similarity" |
|
else: |
|
return "Not Activated" |
|
|
|
|
|
def predict_epitope(self): |
|
seq = self.sequence |
|
seq_extra = ReVa.extraction_feature(seq) |
|
try: |
|
pred_res_B = [self.Blabel[self.loaded_Bmodel.predict([seq_extra[i]])[0]] for i in range(len(seq_extra))] |
|
pred_res_T = [self.Tlabel[self.loaded_Tmodel.predict([seq_extra[i]])[0]] for i in range(len(seq_extra))] |
|
|
|
pred_proba_B = [np.max(self.loaded_Bmodel.predict_proba([seq_extra[i]])[0]) for i in range(len(seq_extra))] |
|
pred_proba_T = [np.max(self.loaded_Tmodel.predict_proba([seq_extra[i]])[0]) for i in range(len(seq_extra))] |
|
except: |
|
print("Error on epitope predict") |
|
|
|
|
|
seq_B = ReVa.combine_lists(seq, pred_res_B) |
|
pred_B = ReVa.process_epitope(pred_res_B) |
|
seq_T = ReVa.combine_lists(seq, pred_res_T) |
|
pred_T = ReVa.process_epitope(pred_res_T) |
|
|
|
pred_res1 = { |
|
'B': {'amino acid': ReVa.string_to_list(seq), 'predictions': pred_res_B, 'probabilities': pred_proba_B}, |
|
'T': {'amino acid': ReVa.string_to_list(seq), 'predictions': pred_res_T, 'probabilities': pred_proba_T} |
|
} |
|
|
|
pred_res2 = { |
|
'B': {'seq': seq_B, 'label': pred_B}, |
|
'T': {'seq': seq_T, 'label': pred_T} |
|
} |
|
|
|
return pred_res1, pred_res2 |
|
|
|
def predict_eval(self, Bpred, Tpred): |
|
BCell = ReVa.filter_epitope(Bpred)['seq'] |
|
TCell = ReVa.filter_epitope(Tpred)['seq'] |
|
|
|
Ballergen = [] |
|
BallergenProb = [] |
|
for i in range(len(BCell)): |
|
baller, ballerprob = ReVa.predict_label_and_probability_allergenicity(self, BCell[i]) |
|
Ballergen.append(baller) |
|
BallergenProb.append(ballerprob[0]) |
|
|
|
Tallergen = [] |
|
TallergenProb = [] |
|
for i in range(len(TCell)): |
|
baller, ballerprob = ReVa.predict_label_and_probability_allergenicity(self, TCell[i]) |
|
Tallergen.append(baller) |
|
TallergenProb.append(ballerprob[0]) |
|
|
|
Btoxin = [] |
|
BtoxinProb = [] |
|
Ttoxin = [] |
|
TtoxinProb = [] |
|
|
|
for i in range(len(BCell)): |
|
baller, ballerprob = ReVa.predict_label_and_probability_toxin(self, BCell[i]) |
|
Btoxin.append(baller) |
|
BtoxinProb.append(ballerprob[0]) |
|
|
|
for i in range(len(TCell)): |
|
baller, ballerprob = ReVa.predict_label_and_probability_toxin(self, TCell[i]) |
|
Ttoxin.append(baller) |
|
TtoxinProb.append(ballerprob[0]) |
|
|
|
BAntigen = [] |
|
BAntigenProb = [] |
|
TAntigen = [] |
|
TAntigenProb = [] |
|
|
|
for i in range(len(BCell)): |
|
baller, ballerprob = ReVa.predict_label_and_probability_antigenicity(self, BCell[i]) |
|
BAntigen.append(baller) |
|
BAntigenProb.append(ballerprob[0]) |
|
|
|
for i in range(len(TCell)): |
|
baller, ballerprob = ReVa.predict_label_and_probability_antigenicity(self, TCell[i]) |
|
TAntigen.append(baller) |
|
TAntigenProb.append(ballerprob[0]) |
|
|
|
Bhydrophobicity = [] |
|
Bkolaskar = [] |
|
Btangonkar = [] |
|
Bemini = [] |
|
Bsimilar = [] |
|
BPhysicochemical = [] |
|
|
|
for i in range(len(BCell)): |
|
Bhydrophobicity.append(ReVa.calculate_hydrophobicity(BCell[i])) |
|
Bkolaskar.append(ReVa.antigenicity(BCell[i])) |
|
Btangonkar.append(ReVa.antigenicity(BCell[i], window_size=5)) |
|
Bemini.append(ReVa.emini_surface_accessibility(BCell[i])) |
|
Bsimilar.append(ReVa.perform_blastp(BCell[i], self)) |
|
BPhysicochemical.append(ProtParamClone(BCell[i]).calculate()) |
|
|
|
Thydrophobicity = [] |
|
Tkolaskar = [] |
|
Ttangonkar = [] |
|
Temini = [] |
|
Tsimilar = [] |
|
TPhysicochemical = [] |
|
|
|
for i in range(len(TCell)): |
|
Thydrophobicity.append(ReVa.calculate_hydrophobicity(TCell[i])) |
|
Tkolaskar.append(ReVa.antigenicity(TCell[i])) |
|
Ttangonkar.append(ReVa.antigenicity(TCell[i], window_size=5)) |
|
Temini.append(ReVa.emini_surface_accessibility(TCell[i])) |
|
Tsimilar.append(ReVa.perform_blastp(TCell[i], self)) |
|
TPhysicochemical.append(ProtParamClone(TCell[i]).calculate()) |
|
|
|
classical_dock1B, classical_dock1T = ClassicalDocking(BCell, TCell, self.base_path, self.target_path, self.n_receptor).ForceField1() |
|
classical_dock1BAdjuvant, classical_dock1TAdjuvant = ClassicalDockingWithAdjuvant(BCell, TCell, self.base_path, self.target_path, self.n_receptor, self.n_adjuvant).ForceField1() |
|
|
|
dock1B, dock1T = MLDocking(BCell, TCell, self.base_path, self.target_path, self.n_receptor).MLDock1() |
|
dock1BAdjuvant, dock1TAdjuvant = MLDockingWithAdjuvant(BCell, TCell, self.base_path, self.target_path, self.n_receptor, self.n_adjuvant).MLDock1() |
|
|
|
pred = { |
|
'seq': { |
|
'B':BCell, |
|
'T':TCell |
|
}, |
|
|
|
'allergenicity' : { |
|
'B' : Ballergen, |
|
'T' : Tallergen, |
|
'B_proba' : BallergenProb, |
|
'T_proba' : TallergenProb |
|
}, |
|
|
|
'toxin' : { |
|
'B' : Btoxin, |
|
'T' : Ttoxin, |
|
'B_proba' : BtoxinProb, |
|
'T_proba' : TtoxinProb |
|
}, |
|
|
|
'antigenicity' : { |
|
'B' : BAntigen, |
|
'T' : TAntigen, |
|
'B_proba' : BAntigenProb, |
|
'T_proba' : TAntigenProb |
|
}, |
|
|
|
'hydrophobicity' : { |
|
'B' : Bhydrophobicity, |
|
'T' : Thydrophobicity |
|
}, |
|
|
|
'kolaskar' : { |
|
'B' : Bkolaskar, |
|
'T' : Tkolaskar |
|
}, |
|
|
|
'tangonkar' : { |
|
'B' : Btangonkar, |
|
'T' : Ttangonkar |
|
}, |
|
|
|
'emini' : { |
|
'B' : Bemini, |
|
'T' : Temini |
|
}, |
|
|
|
'similarity' : { |
|
'B' : Bsimilar, |
|
'T' : Tsimilar |
|
}, |
|
|
|
'physicochemical' : { |
|
'B' : BPhysicochemical, |
|
'T' : TPhysicochemical |
|
}, |
|
|
|
'classical dock(Force Field)' : { |
|
'B' : classical_dock1B, |
|
'T' : classical_dock1T |
|
}, |
|
|
|
'classical dock(Force Field) With Adjuvant' : { |
|
'B' : classical_dock1BAdjuvant, |
|
'T' : classical_dock1TAdjuvant |
|
}, |
|
|
|
'Machine Learning based dock' : { |
|
'B' : dock1B, |
|
'T' : dock1T |
|
}, |
|
|
|
'Machine Learning based dock With Adjuvant' : { |
|
'B' : dock1BAdjuvant, |
|
'T' : dock1TAdjuvant |
|
}, |
|
|
|
} |
|
|
|
|
|
|
|
sliced_pred = {key: pred[key] for key in list(pred.keys())[:9]} |
|
|
|
B_data = {key: sliced_pred[key]['B'] for key in sliced_pred.keys() if key != 'seq'} |
|
T_data = {key: sliced_pred[key]['T'] for key in sliced_pred.keys() if key != 'seq'} |
|
|
|
|
|
B_result = pd.DataFrame(B_data) |
|
T_result = pd.DataFrame(T_data) |
|
|
|
print("DataFrames exported to B_result.xlsx and T_result.xlsx") |
|
|
|
file_path = f'{self.target_path}/{generate_filename_with_timestamp_and_random()}_eval_res_quantum.json' |
|
|
|
|
|
B_result.to_csv(f"{self.target_path}/B_result.csv", index=False) |
|
T_result.to_csv(f"{self.target_path}/T_result.csv", index=False) |
|
|
|
try: |
|
url = self.llm_url |
|
B_res_review = requests.post(url, files = {"uploaded_file": open(f'{self.target_path}/B_result.csv', 'rb')}, verify=False, timeout=6000) |
|
T_res_review = requests.post(url, files = {"uploaded_file": open(f'{self.target_path}/T_result.csv', 'rb')}, verify=False, timeout=6000) |
|
|
|
|
|
|
|
|
|
|
|
|
|
export_string_to_text_file(B_res_review.text, f'{self.target_path}/B_res_review.txt') |
|
export_string_to_text_file(T_res_review.text, f'{self.target_path}/T_res_review.txt') |
|
except: |
|
pass |
|
|
|
try: |
|
alphafold_res_dir = f'{self.target_path}/Alphafold Modelling Result' |
|
|
|
create_folder(alphafold_res_dir) |
|
create_folder(f'{alphafold_res_dir}/B') |
|
create_folder(f'{alphafold_res_dir}/T') |
|
|
|
url = self.alphafold_url |
|
if url[-1] == '/': |
|
pass |
|
else: |
|
url += '/' |
|
|
|
for epitope_type in list(['B', 'T']): |
|
for i, seq in enumerate(pred['seq'][epitope_type]): |
|
|
|
response = requests.get(url+ "?protein_sequence="+seq+"&jobname="+f"{epitope_type}_{i}_3D_{seq}", verify=False, timeout=6000) |
|
if response.status_code == 200: |
|
response_res = json.loads(response.text) |
|
print(response_res) |
|
try: |
|
download_file(url + response_res['result'],f"{alphafold_res_dir}/{epitope_type}/{epitope_type}_{i}_3D_{seq}.zip") |
|
except: |
|
print("Error/gagal download") |
|
else: |
|
print("Failed Protein Modelling") |
|
continue |
|
|
|
except: |
|
pass |
|
|
|
|
|
with open(file_path, 'w') as json_file: |
|
json.dump(str(pred), json_file) |
|
|
|
|
|
return pred |
|
|
|
def predict(self): |
|
print("Starting Predict Epitope...") |
|
pred1, pred2 = self.predict_epitope() |
|
print("Starting Predict Evalution For Epitope...") |
|
pred_eval = self.predict_eval(pred2['B'], pred2['T']) |
|
print("Finished Predict") |
|
return pred1, pred2, pred_eval |
|
|
|
class QReVa: |
|
def preprocessing_begin(seq): |
|
seq = str(seq).upper() |
|
delete_char = "BJOUXZ\n\t 1234567890*&^%$#@!~()[];:',.<><?/" |
|
for i in range(len(delete_char)): |
|
seq = seq.replace(delete_char[i],'') |
|
return seq |
|
|
|
|
|
def __init__(self, sequence, base_path, target_path, n_receptor, n_adjuvant, blast_activate=False, qibm_api="", backend_type="ibmq_qasm_simulator", llm_url="", alphafold_url=""): |
|
self.sequence = QReVa.preprocessing_begin(sequence) |
|
self.base_path = base_path |
|
self.blast_activate = blast_activate |
|
self.target_path = target_path |
|
self.n_receptor = n_receptor |
|
self.n_adjuvant = n_adjuvant |
|
self.qibm_api = qibm_api |
|
self.backend_type = backend_type |
|
self.llm_url = llm_url |
|
self.alphafold_url = alphafold_url |
|
self.sampler = None |
|
create_folder(self.target_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.alphabet = 'ACDEFGHIKLMNPQRSTVWY' |
|
self.num_features = len(self.alphabet) |
|
|
|
try: |
|
model_path = 'B_vqc_model' |
|
self.loaded_Bmodel = QReVa.quantum_load_model(self.base_path, model_path, self) |
|
except Exception as e: |
|
print(f"Error on Load Model Epitope B : {e}") |
|
|
|
|
|
try: |
|
model_path = 'T_vqc_model' |
|
self.loaded_Tmodel = QReVa.quantum_load_model(self.base_path, model_path, self) |
|
except: |
|
print("Error on load model Epitope T") |
|
|
|
|
|
try: |
|
with open(os.path.join(label_dir, 'allergenicity_label_mapping_quantum.json'), 'r') as f: |
|
label_dict = json.load(f) |
|
except: |
|
print("Error on load allergenicity label") |
|
|
|
|
|
self.reverse_label_mapping_allergen = {v: k for k, v in label_dict.items()} |
|
self.seq_length_allergen = 4857 |
|
|
|
try: |
|
with open(os.path.join(label_dir,'toxin_label_mapping_quantum.json'), 'r') as f: |
|
label_dict = json.load(f) |
|
except: |
|
print("Error on load toxin label") |
|
|
|
|
|
self.reverse_label_mapping_toxin = {v: k for k, v in label_dict.items()} |
|
self.seq_length_toxin = 35 |
|
|
|
try: |
|
with open(os.path.join(label_dir, 'antigenicity_label_mapping_quantum.json'), 'r') as f: |
|
label_dict = json.load(f) |
|
except: |
|
print("Error on load antigenicity label") |
|
|
|
|
|
self.reverse_label_mapping_antigen = {v: k for k, v in label_dict.items()} |
|
self.seq_length_antigen = 83 |
|
|
|
try: |
|
with open(os.path.join(label_dir,'BPepTree_label_quantum.json'), 'r') as f: |
|
label_dict = json.load(f) |
|
self.Blabel = QReVa.invert_dict(label_dict) |
|
except: |
|
print("Error on load Epitope B label") |
|
|
|
|
|
try: |
|
with open(os.path.join(label_dir,'TPepTree_label_quantum.json'), 'r') as f: |
|
label_dict = json.load(f) |
|
self.Tlabel = QReVa.invert_dict(label_dict) |
|
except: |
|
print("Error on load Epitope T label") |
|
|
|
|
|
def quantum_load_model(base_path, model_path, self): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = VQC.load(os.path.join(qmodel_dir, model_path)) |
|
|
|
return model |
|
|
|
def combine_lists(list1, list2): |
|
result = [] |
|
current_group = "" |
|
|
|
for i in range(len(list1)): |
|
if list2[i] == 'E': |
|
current_group += list1[i] |
|
else: |
|
if current_group: |
|
result.append(current_group) |
|
current_group = "" |
|
result.append(list1[i]) |
|
|
|
if current_group: |
|
result.append(current_group) |
|
|
|
return result |
|
|
|
def q_extraction_feature(seq): |
|
com = molecular_function.calculate_amino_acid_center_of_mass(str(seq)) |
|
weight = molecular_function.calculate_molecular_weight(str(molecular_function.seq_to_smiles(seq))) |
|
|
|
return com, weight |
|
|
|
def janin_hydrophobicity_scale(aa): |
|
scale = { |
|
'A': 0.42, 'C': 0.82, 'D': -1.23, 'E': -2.02, 'F': 1.37, |
|
'G': 0.58, 'H': -0.73, 'I': 1.38, 'K': -1.05, 'L': 1.06, |
|
'M': 0.64, 'N': -0.6, 'P': 0.12, 'Q': -0.22, 'R': -0.84, |
|
'S': -0.04, 'T': 0.26, 'V': 1.08, 'W': 1.78, 'Y': 0.79 |
|
} |
|
return scale.get(aa, 0.0) |
|
|
|
|
|
def get_position(aa): |
|
pos = [i+1 for i in range(0, len(aa))] |
|
return pos |
|
|
|
def one_hot_encoding(self, sequence): |
|
encoding = [] |
|
for char in sequence: |
|
vector = [0] * self.num_features |
|
if char in self.alphabet: |
|
index = self.alphabet.index(char) |
|
vector[index] = 1 |
|
encoding.append(vector) |
|
return encoding |
|
|
|
def extraction_feature(aa): |
|
pos = QReVa.get_position(aa) |
|
scale = [QReVa.janin_hydrophobicity_scale(aa[i]) for i in range(len(aa))] |
|
|
|
res = [[pos[i], scale[i], len(aa)] for i in range(len(pos))] |
|
|
|
return res |
|
|
|
def predict_label_and_probability_allergenicity(self, sequence): |
|
try: |
|
model_path = 'allergen_vqc_model' |
|
model = QReVa.quantum_load_model(model_dir, model_path, self) |
|
except: |
|
print("Error on load model allergenicity") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
feature = [QReVa.q_extraction_feature(sequence)] |
|
prediction = int(model.predict(feature)) |
|
print(f"Prediction of allergen : {prediction}") |
|
prediction = self.reverse_label_mapping_allergen[prediction] |
|
|
|
return prediction |
|
except: |
|
print("Error predict allergenicity") |
|
|
|
|
|
def predict_label_and_probability_toxin(self, sequence): |
|
try: |
|
model_path = 'allergen_vqc_model' |
|
model = QReVa.quantum_load_model(model_dir, model_path, self) |
|
except: |
|
print("Error on load model toxin") |
|
|
|
|
|
try: |
|
feature = [QReVa.q_extraction_feature(sequence)] |
|
prediction = int(model.predict(feature)) |
|
prediction = self.reverse_label_mapping_toxin[prediction] |
|
|
|
return prediction |
|
except: |
|
print("Error toxin predict") |
|
|
|
|
|
def predict_label_and_probability_antigenicity(self, sequence): |
|
try: |
|
model_path = 'antigen_vqc_model' |
|
model = QReVa.quantum_load_model(model_dir, model_path, self) |
|
except: |
|
print("Error on load model antigenicity") |
|
|
|
|
|
try: |
|
feature = [QReVa.q_extraction_feature(sequence)] |
|
prediction = int(model.predict(feature)) |
|
prediction = self.reverse_label_mapping_antigen[prediction] |
|
|
|
return prediction |
|
except: |
|
print("Error antigenicity predict") |
|
|
|
|
|
def invert_dict(dictionary): |
|
inverted_dict = {value: key for key, value in dictionary.items()} |
|
return inverted_dict |
|
|
|
def process_epitope(input_list): |
|
output_list = [] |
|
current_group = [] |
|
|
|
for item in input_list: |
|
if item == 'E': |
|
current_group.append(item) |
|
else: |
|
if current_group: |
|
output_list.append(''.join(current_group)) |
|
current_group = [] |
|
output_list.append(item) |
|
|
|
if current_group: |
|
output_list.append(''.join(current_group)) |
|
|
|
return output_list |
|
|
|
def filter_epitope(data): |
|
filtered_seq = [] |
|
filtered_label = [] |
|
|
|
for i in range(len(data['seq'])): |
|
if data['label'][i] != '.': |
|
filtered_seq.append(data['seq'][i]) |
|
filtered_label.append(data['label'][i]) |
|
|
|
filtered_data = {'seq': filtered_seq, 'label': filtered_label} |
|
return filtered_data |
|
|
|
def string_to_list(input_string): |
|
return list(input_string) |
|
|
|
def calculate_hydrophobicity(sequence): |
|
hydrophobic_residues = ['A', 'I', 'L', 'M', 'F', 'V', 'W', 'Y'] |
|
hydrophilic_residues = ['R', 'N', 'C', 'Q', 'E', 'G', 'H', 'K', 'S', 'T', 'D'] |
|
hydrophobicity_scores = { |
|
'A': 0.62, 'R': -2.53, 'N': -0.78, 'D': -0.90, 'C': 0.29, |
|
'Q': -0.85, 'E': -0.74, 'G': 0.48, 'H': -0.40, 'I': 1.38, |
|
'L': 1.06, 'K': -1.50, 'M': 0.64, 'F': 1.19, 'P': 0.12, |
|
'S': -0.18, 'T': -0.05, 'W': 0.81, 'Y': 0.26, 'V': 1.08 |
|
} |
|
|
|
hydrophobicity = 0 |
|
for residue in sequence: |
|
if residue in hydrophobic_residues: |
|
hydrophobicity += hydrophobicity_scores[residue] |
|
elif residue in hydrophilic_residues: |
|
hydrophobicity -= hydrophobicity_scores[residue] |
|
else: |
|
hydrophobicity -= 0.5 |
|
|
|
return hydrophobicity / len(sequence) |
|
|
|
def antigenicity(sequence, window_size=7): |
|
antigenicity_scores = [] |
|
for i in range(len(sequence) - window_size + 1): |
|
window = sequence[i:i+window_size] |
|
antigenicity_score = sum([1 if window[j] == 'A' or window[j] == 'G' else 0 for j in range(window_size)]) |
|
antigenicity_scores.append(antigenicity_score) |
|
return antigenicity_scores |
|
|
|
def emini_surface_accessibility(sequence, window_size=9): |
|
surface_accessibility_scores = [] |
|
for i in range(len(sequence) - window_size + 1): |
|
window = sequence[i:i+window_size] |
|
surface_accessibility_score = sum([1 if window[j] in ['S', 'T', 'N', 'Q'] else 0 for j in range(window_size)]) |
|
surface_accessibility_scores.append(surface_accessibility_score) |
|
return surface_accessibility_scores |
|
|
|
def perform_blastp(query_sequence, self): |
|
|
|
|
|
if self.blast_activate == True: |
|
start = time.time() |
|
try: |
|
result_handle = NCBIWWW.qblast("blastp", "nr", query_sequence) |
|
except Exception as e: |
|
|
|
print("BLASTp failed to connect") |
|
return "Skip because any error" |
|
|
|
|
|
print("BLASTp Starting..") |
|
blast_records = NCBIXML.parse(result_handle) |
|
for blast_record in blast_records: |
|
for alignment in blast_record.alignments: |
|
|
|
|
|
for hsp in alignment.hsps: |
|
similarity = (hsp.positives / hsp.align_length) * 100 |
|
if similarity > 80: |
|
return similarity |
|
print("BLASTp Finisihing..") |
|
end = time.time() |
|
time_blast = end-start |
|
print(f"Time for BLASTp : {time_blast} s") |
|
|
|
return "Non-similarity" |
|
else: |
|
return "Not Activated" |
|
|
|
|
|
def predict_epitope(self): |
|
seq = self.sequence |
|
seq_extra = QReVa.extraction_feature(seq) |
|
|
|
|
|
|
|
|
|
|
|
|
|
print("pass test") |
|
|
|
pred_res_B = [self.Blabel[int(self.loaded_Bmodel.predict([seq_extra[i]]))] for i in range(len(seq_extra))] |
|
print("Prediction B epitope pass") |
|
pred_res_T = [self.Tlabel[int(self.loaded_Tmodel.predict([seq_extra[i]]))] for i in range(len(seq_extra))] |
|
print("Prediction T epitope pass") |
|
|
|
seq_B = QReVa.combine_lists(seq, pred_res_B) |
|
pred_B = QReVa.process_epitope(pred_res_B) |
|
seq_T = QReVa.combine_lists(seq, pred_res_T) |
|
pred_T = QReVa.process_epitope(pred_res_T) |
|
|
|
|
|
pred_res1 = { |
|
'B': {'amino acid': QReVa.string_to_list(seq), 'predictions': pred_res_B}, |
|
'T': {'amino acid': QReVa.string_to_list(seq), 'predictions': pred_res_T} |
|
} |
|
|
|
pred_res2 = { |
|
'B': {'seq': seq_B, 'label': pred_B}, |
|
'T': {'seq': seq_T, 'label': pred_T} |
|
} |
|
|
|
return pred_res1, pred_res2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def predict_eval(self, Bpred, Tpred): |
|
BCell = QReVa.filter_epitope(Bpred)['seq'] |
|
TCell = QReVa.filter_epitope(Tpred)['seq'] |
|
|
|
Ballergen = [] |
|
for i in range(len(BCell)): |
|
baller = QReVa.predict_label_and_probability_allergenicity(self, BCell[i]) |
|
Ballergen.append(baller) |
|
|
|
Tallergen = [] |
|
for i in range(len(TCell)): |
|
baller = QReVa.predict_label_and_probability_allergenicity(self, TCell[i]) |
|
Tallergen.append(baller) |
|
|
|
Btoxin = [] |
|
Ttoxin = [] |
|
|
|
for i in range(len(BCell)): |
|
baller = QReVa.predict_label_and_probability_toxin(self, BCell[i]) |
|
Btoxin.append(baller) |
|
|
|
for i in range(len(TCell)): |
|
baller = QReVa.predict_label_and_probability_toxin(self, TCell[i]) |
|
Ttoxin.append(baller) |
|
|
|
BAntigen = [] |
|
TAntigen = [] |
|
|
|
for i in range(len(BCell)): |
|
baller = QReVa.predict_label_and_probability_antigenicity(self, BCell[i]) |
|
BAntigen.append(baller) |
|
|
|
for i in range(len(TCell)): |
|
baller = QReVa.predict_label_and_probability_antigenicity(self, TCell[i]) |
|
TAntigen.append(baller) |
|
|
|
Bhydrophobicity = [] |
|
Bkolaskar = [] |
|
Btangonkar = [] |
|
Bemini = [] |
|
Bsimilar = [] |
|
BPhysicochemical = [] |
|
|
|
for i in range(len(BCell)): |
|
Bhydrophobicity.append(QReVa.calculate_hydrophobicity(BCell[i])) |
|
Bkolaskar.append(QReVa.antigenicity(BCell[i])) |
|
Btangonkar.append(QReVa.antigenicity(BCell[i], window_size=5)) |
|
Bemini.append(QReVa.emini_surface_accessibility(BCell[i])) |
|
Bsimilar.append(QReVa.perform_blastp(BCell[i], self)) |
|
BPhysicochemical.append(ProtParamClone(BCell[i]).calculate()) |
|
|
|
Thydrophobicity = [] |
|
Tkolaskar = [] |
|
Ttangonkar = [] |
|
Temini = [] |
|
Tsimilar = [] |
|
TPhysicochemical = [] |
|
|
|
for i in range(len(TCell)): |
|
Thydrophobicity.append(QReVa.calculate_hydrophobicity(TCell[i])) |
|
Tkolaskar.append(QReVa.antigenicity(TCell[i])) |
|
Ttangonkar.append(QReVa.antigenicity(TCell[i], window_size=5)) |
|
Temini.append(QReVa.emini_surface_accessibility(TCell[i])) |
|
Tsimilar.append(QReVa.perform_blastp(TCell[i], self)) |
|
TPhysicochemical.append(ProtParamClone(TCell[i]).calculate()) |
|
|
|
|
|
classical_dock1B, classical_dock1T = ClassicalDocking(BCell, TCell, self.base_path, self.target_path, self.n_receptor).ForceField1() |
|
|
|
classical_dock1BAdjuvant, classical_dock1TAdjuvant = ClassicalDockingWithAdjuvant(BCell, TCell, self.base_path, self.target_path, self.n_receptor, self.n_adjuvant).ForceField1() |
|
|
|
dock1B, dock1T = QMLDocking(BCell, TCell, self.base_path, self.target_path, self.n_receptor, self.sampler).MLDock1() |
|
dock1BAdjuvant, dock1TAdjuvant = QMLDockingWithAdjuvant(BCell, TCell, self.base_path, self.target_path, self.n_receptor, self.n_adjuvant, self.sampler).MLDock1() |
|
|
|
pred = { |
|
'seq': { |
|
'B':BCell, |
|
'T':TCell |
|
}, |
|
|
|
'allergenicity' : { |
|
'B' : Ballergen, |
|
'T' : Tallergen |
|
}, |
|
|
|
'toxin' : { |
|
'B' : Btoxin, |
|
'T' : Ttoxin |
|
}, |
|
|
|
'antigenicity' : { |
|
'B' : BAntigen, |
|
'T' : TAntigen |
|
}, |
|
|
|
'hydrophobicity' : { |
|
'B' : Bhydrophobicity, |
|
'T' : Thydrophobicity |
|
}, |
|
|
|
'kolaskar' : { |
|
'B' : Bkolaskar, |
|
'T' : Tkolaskar |
|
}, |
|
|
|
'tangonkar' : { |
|
'B' : Btangonkar, |
|
'T' : Ttangonkar |
|
}, |
|
|
|
'emini' : { |
|
'B' : Bemini, |
|
'T' : Temini |
|
}, |
|
|
|
'similarity' : { |
|
'B' : Bsimilar, |
|
'T' : Tsimilar |
|
}, |
|
|
|
'physicochemical' : { |
|
'B' : BPhysicochemical, |
|
'T' : TPhysicochemical |
|
}, |
|
|
|
'classical dock(Force Field)' : { |
|
'B' : classical_dock1B, |
|
'T' : classical_dock1T |
|
}, |
|
|
|
'classical dock(Force Field) With Adjuvant' : { |
|
'B' : classical_dock1BAdjuvant, |
|
'T' : classical_dock1TAdjuvant |
|
}, |
|
|
|
'Machine Learning based dock' : { |
|
'B' : dock1B, |
|
'T' : dock1T |
|
}, |
|
|
|
'Machine Learning based dock With Adjuvant' : { |
|
'B' : dock1BAdjuvant, |
|
'T' : dock1TAdjuvant |
|
}, |
|
|
|
} |
|
|
|
sliced_pred = {key: pred[key] for key in list(pred.keys())[:9]} |
|
|
|
B_data = {key: sliced_pred[key]['B'] for key in sliced_pred.keys() if key != 'seq'} |
|
T_data = {key: sliced_pred[key]['T'] for key in sliced_pred.keys() if key != 'seq'} |
|
|
|
|
|
B_result = pd.DataFrame(B_data) |
|
T_result = pd.DataFrame(T_data) |
|
|
|
print("DataFrames exported to B_result.xlsx and T_result.xlsx") |
|
|
|
file_path = f'{self.target_path}/{generate_filename_with_timestamp_and_random()}_eval_res_quantum.json' |
|
|
|
|
|
B_result.to_csv(f"{self.target_path}/B_result.csv", index=False) |
|
T_result.to_csv(f"{self.target_path}/T_result.csv", index=False) |
|
|
|
try: |
|
url = self.llm_url |
|
B_res_review = requests.post(url, files = {"uploaded_file": open(f'{self.target_path}/B_result.csv', 'rb')}, verify=False, timeout=6000) |
|
T_res_review = requests.post(url, files = {"uploaded_file": open(f'{self.target_path}/T_result.csv', 'rb')}, verify=False, timeout=6000) |
|
|
|
|
|
|
|
|
|
|
|
|
|
export_string_to_text_file(B_res_review.text, f'{self.target_path}/B_res_review.txt') |
|
export_string_to_text_file(T_res_review.text, f'{self.target_path}/T_res_review.txt') |
|
except: |
|
pass |
|
|
|
try: |
|
alphafold_res_dir = f'{self.target_path}/Alphafold Modelling Result' |
|
|
|
create_folder(alphafold_res_dir) |
|
create_folder(f'{alphafold_res_dir}/B') |
|
create_folder(f'{alphafold_res_dir}/T') |
|
|
|
url = self.alphafold_url |
|
if url[-1] == '/': |
|
pass |
|
else: |
|
url += '/' |
|
|
|
for epitope_type in list(['B', 'T']): |
|
for i, seq in enumerate(pred['seq'][epitope_type]): |
|
|
|
response = requests.get(url+ "?protein_sequence="+seq+"&jobname="+f"{epitope_type}_{i}_3D_{seq}", verify=False, timeout=6000) |
|
if response.status_code == 200: |
|
response_res = json.loads(response.text) |
|
print(response_res) |
|
try: |
|
download_file(url + response_res['result'],f"{alphafold_res_dir}/{epitope_type}/{epitope_type}_{i}_3D_{seq}.zip") |
|
except: |
|
print("Error/gagal download") |
|
else: |
|
print("Failed Protein Modelling") |
|
continue |
|
|
|
except: |
|
pass |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with open(file_path, 'w') as json_file: |
|
json.dump(str(pred), json_file) |
|
|
|
|
|
return pred |
|
|
|
def predict(self): |
|
print("Starting Predict Epitope..") |
|
pred1, pred2 = self.predict_epitope() |
|
print("Starting Predict Evalution For Epitope..") |
|
pred_eval = self.predict_eval(pred2['B'], pred2['T']) |
|
print("Finished Predict") |
|
return pred1, pred2, pred_eval |
|
|
|
class ProtParamClone: |
|
|
|
def preprocessing_begin(seq): |
|
seq = str(seq).upper() |
|
delete_char = "BJOUXZ\n\t 1234567890*&^%$#@!~()[];:',.<><?/" |
|
for i in range(len(delete_char)): |
|
seq = seq.replace(delete_char[i],'') |
|
return seq |
|
|
|
def __init__(self, seq): |
|
self.seq = ProtParamClone.preprocessing_begin(seq) |
|
|
|
self.hydropathy_values = { |
|
'A': 1.800, |
|
'R': -4.500, |
|
'N': -3.500, |
|
'D': -3.500, |
|
'C': 2.500, |
|
'E': -3.500, |
|
'Q': -3.500, |
|
'G': -0.400, |
|
'H': -3.200, |
|
'I': 4.500, |
|
'L': 3.800, |
|
'K': -3.900, |
|
'M': 1.900, |
|
'F': 2.800, |
|
'P': -1.600, |
|
'S': -0.800, |
|
'T': -0.700, |
|
'W': -0.900, |
|
'Y': -1.300, |
|
'V': 4.200 |
|
} |
|
|
|
|
|
self.extinction_coefficients = { |
|
'Tyr': 1490, |
|
'Trp': 5500, |
|
'Cystine': 125 |
|
} |
|
|
|
|
|
self.half_life_values = { |
|
'A': {'Mammalian': 4.4, 'Yeast': 20, 'E. coli': 10}, |
|
'R': {'Mammalian': 1, 'Yeast':2, 'E. coli':2}, |
|
'N': {'Mammalian': 1.4, 'Yeast':3, 'E. coli': 10}, |
|
'D': {'Mammalian': 1.1, 'Yeast':3, 'E. coli': 10}, |
|
'C': {'Mammalian': 1.2, 'Yeast': 20,'E. coli': 10}, |
|
'E': {'Mammalian': 0.8, 'Yeast':10, 'E. coli': 10}, |
|
'Q': {'Mammalian': 1, 'Yeast':30, 'E. coli': 10}, |
|
'G': {'Mammalian': 30, 'Yeast': 20, 'E. coli': 10}, |
|
'H': {'Mammalian': 3.5, 'Yeast':10, 'E. coli': 10}, |
|
'I': {'Mammalian': 20, 'Yeast':30, 'E. coli': 10}, |
|
'L': {'Mammalian': 5.5, 'Yeast':3, 'E. coli':2}, |
|
'K': {'Mammalian': 1.3, 'Yeast':3, 'E. coli':2}, |
|
'M': {'Mammalian': 30, 'Yeast': 20,'E. coli': 10}, |
|
'F': {'Mammalian': 1.1, 'Yeast':3, 'E. coli':2}, |
|
'P': {'Mammalian': 20, 'Yeast': 20,'E. coli':0}, |
|
'S': {'Mammalian': 1.9, 'Yeast': 20,'E. coli': 10}, |
|
'T': {'Mammalian': 7.2, 'Yeast': 20,'E. coli': 10}, |
|
'W': {'Mammalian': 2.8, 'Yeast':3, 'E. coli':2}, |
|
'Y': {'Mammalian': 2.8, 'Yeast':10, 'E. coli':2}, |
|
'V': {'Mammalian': 100, 'Yeast': 20, 'E. coli': 10} |
|
} |
|
|
|
|
|
def calculate_instability_index(sequence, self): |
|
X = ProteinAnalysis(sequence) |
|
return X.instability_index() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def calculate_aliphatic_index(sequence): |
|
X_Ala = (sequence.count('A') / len(sequence)) * 100 |
|
X_Val = (sequence.count('V') / len(sequence)) * 100 |
|
X_Ile = (sequence.count('I') / len(sequence)) * 100 |
|
X_Leu = (sequence.count('L') / len(sequence)) * 100 |
|
aliphatic_index = X_Ala + 2.9 * X_Val + 3.9 * (X_Ile + X_Leu) |
|
return aliphatic_index |
|
|
|
|
|
def calculate_gravy(sequence, self): |
|
gravy = sum(self.hydropathy_values.get(aa, 0) for aa in sequence) / len(sequence) |
|
return gravy |
|
|
|
|
|
def calculate_extinction_coefficient(sequence, self): |
|
num_Tyr = sequence.count('Y') |
|
num_Trp = sequence.count('W') |
|
num_Cystine = sequence.count('C') |
|
extinction_prot = (num_Tyr * self.extinction_coefficients['Tyr'] + |
|
num_Trp * self.extinction_coefficients['Trp'] + |
|
num_Cystine * self.extinction_coefficients['Cystine']) |
|
return extinction_prot |
|
|
|
|
|
def predict_half_life(self, sequence, organism='Mammalian'): |
|
n_terminal_residue = sequence[0] |
|
half_life = self.half_life_values.get(n_terminal_residue, {}).get(organism, 'N/A') |
|
return half_life |
|
|
|
|
|
def calculate_atom_composition(molecule): |
|
atom_composition = {} |
|
atoms = molecule.GetAtoms() |
|
for atom in atoms: |
|
atom_symbol = atom.GetSymbol() |
|
if atom_symbol in atom_composition: |
|
atom_composition[atom_symbol] += 1 |
|
else: |
|
atom_composition[atom_symbol] = 1 |
|
return atom_composition |
|
|
|
|
|
def calculate_HNOSt_composition(molecule): |
|
composition = ProtParamClone.calculate_atom_composition(molecule) |
|
C_count = composition.get('C', 0) |
|
H_count = composition.get('H', 0) |
|
N_count = composition.get('N', 0) |
|
O_count = composition.get('O', 0) |
|
S_count = composition.get('S', 0) |
|
return C_count, H_count, N_count, O_count, S_count |
|
|
|
|
|
def calculate_theoretical_pI(protein_sequence): |
|
X = ProteinAnalysis(protein_sequence) |
|
pI = X.isoelectric_point() |
|
|
|
return pI |
|
|
|
def MolWeight(self): |
|
mol = Chem.MolFromSequence(self.seq) |
|
mol_weight = Descriptors.MolWt(mol) |
|
return mol_weight |
|
|
|
def calculate(self): |
|
try: |
|
try: |
|
instability = ProtParamClone.calculate_instability_index(self.seq, self) |
|
except: |
|
print("error instability") |
|
aliphatic = ProtParamClone.calculate_aliphatic_index(self.seq) |
|
try: |
|
calculate_gravy = ProtParamClone.calculate_gravy(self.seq, self) |
|
except: |
|
print("error gravy") |
|
extinction = ProtParamClone.calculate_extinction_coefficient(self.seq, self) |
|
try: |
|
half_life = ProtParamClone.predict_half_life(self, sequence=self.seq) |
|
except: |
|
print("error half life") |
|
mol = Chem.MolFromSequence(self.seq) |
|
try: |
|
formula = rdMolDescriptors.CalcMolFormula(mol) |
|
except: |
|
print("error formula") |
|
Cn, Hn, Nn, On, Sn = ProtParamClone.calculate_HNOSt_composition(mol) |
|
try: |
|
theoretical_pI = IP.IsoelectricPoint(self.seq).pi() |
|
except: |
|
print("erro theoretical_pI") |
|
m_weight = ProtParamClone.MolWeight(self) |
|
|
|
res = { |
|
'seq' : self.seq, |
|
'instability' : instability, |
|
'aliphatic' : aliphatic, |
|
'gravy' : calculate_gravy, |
|
'extinction' : extinction, |
|
'half_life' : half_life, |
|
'formula' : formula, |
|
'C' : Cn, |
|
'H' : Hn, |
|
'N' : Nn, |
|
'O' : On, |
|
'S' : Sn, |
|
'theoretical_pI' : theoretical_pI, |
|
'mol weight' : m_weight |
|
} |
|
|
|
return res |
|
except: |
|
print("Error calculate physicochemical") |
|
|
|
class ClassicalDocking: |
|
def __init__(self, bseq, tseq, base_path, target_path, n_receptor): |
|
self.bseq = bseq |
|
self.tseq = tseq |
|
self.base_path = base_path |
|
self.target_path = target_path |
|
self.n_receptor = n_receptor |
|
|
|
def ForceField1(self): |
|
b_cell_receptor = pd.read_csv(os.path.join(data_dir, 'b cell receptor homo sapiens.csv')) |
|
b_cell_receptor = b_cell_receptor.sample(frac=1, random_state=42).reset_index(drop=True) |
|
b_cell_receptor = b_cell_receptor[0:self.n_receptor] |
|
b_epitope = self.bseq |
|
|
|
|
|
seq1 = [] |
|
seq2 = [] |
|
com1 = [] |
|
com2 = [] |
|
seq2id = [] |
|
Attractive = [] |
|
Repulsive = [] |
|
VDW_lj_force = [] |
|
coulomb_energy = [] |
|
force_field = [] |
|
for b in b_epitope: |
|
for i in range(len(b_cell_receptor)): |
|
seq1.append(b) |
|
seq2.append(b_cell_receptor['seq'][i]) |
|
seq2id.append(b_cell_receptor['id'][i]) |
|
aa1 = molecular_function.calculate_amino_acid_center_of_mass(b) |
|
com1.append(aa1) |
|
aa2 = molecular_function.calculate_amino_acid_center_of_mass(b_cell_receptor['seq'][i]) |
|
com2.append(aa2) |
|
dist = molecular_function.calculate_distance_between_amino_acids(aa1, aa2) |
|
attract = ms.attractive_energy(dist) |
|
Attractive.append(attract) |
|
repulsive = ms.repulsive_energy(dist) |
|
Repulsive.append(repulsive) |
|
vdw = ms.lj_force(dist) |
|
VDW_lj_force.append(vdw) |
|
ce = ms.coulomb_energy(e, e, dist) |
|
coulomb_energy.append(ce) |
|
force_field.append(vdw+ce) |
|
|
|
b_res = { |
|
'Ligand' : seq1, |
|
'Receptor' : seq2, |
|
'Receptor id' : seq2id, |
|
'Center Of Ligand Mass' : com1, |
|
'Center Of Receptor Mass' : com2, |
|
'Attractive' : Attractive, |
|
'Repulsive' : Repulsive, |
|
'VDW LJ Force' : VDW_lj_force, |
|
'Coulomb Energy' : coulomb_energy, |
|
'Force Field' : force_field |
|
} |
|
b_res_df = pd.DataFrame(b_res) |
|
print("Force Field B Cell Success") |
|
|
|
|
|
t_cell_receptor = pd.read_csv(os.path.join(data_dir, 't cell receptor homo sapiens.csv')) |
|
t_cell_receptor = t_cell_receptor.sample(frac=1, random_state=42).reset_index(drop=True) |
|
t_cell_receptor = t_cell_receptor[0:self.n_receptor] |
|
t_epitope = self.tseq |
|
|
|
seq1 = [] |
|
seq2 = [] |
|
seq2id = [] |
|
Attractive = [] |
|
Repulsive = [] |
|
VDW_lj_force = [] |
|
coulomb_energy = [] |
|
force_field = [] |
|
com1 = [] |
|
com2 = [] |
|
for t in t_epitope: |
|
for i in range(len( t_cell_receptor)): |
|
seq1.append(t) |
|
seq2.append(t_cell_receptor['seq'][i]) |
|
seq2id.append( t_cell_receptor['id'][i]) |
|
aa1 = molecular_function.calculate_amino_acid_center_of_mass( t) |
|
com1.append(aa1) |
|
aa2 = molecular_function.calculate_amino_acid_center_of_mass( t_cell_receptor['seq'][i]) |
|
com2.append(aa2) |
|
dist = molecular_function.calculate_distance_between_amino_acids(aa1, aa2) |
|
attract = ms.attractive_energy(dist) |
|
Attractive.append(attract) |
|
repulsive = ms.repulsive_energy(dist) |
|
Repulsive.append(repulsive) |
|
vdw = ms.lj_force(dist) |
|
VDW_lj_force.append(vdw) |
|
ce = ms.coulomb_energy(e, e, dist) |
|
coulomb_energy.append(ce) |
|
force_field.append(vdw+ce) |
|
|
|
t_res = { |
|
'Ligand' : seq1, |
|
'Receptor' : seq2, |
|
'Receptor id' : seq2id, |
|
'Center Of Ligand Mass' : com1, |
|
'Center Of Receptor Mass' : com2, |
|
'Attractive' : Attractive, |
|
'Repulsive' : Repulsive, |
|
'VDW LJ Force' : VDW_lj_force, |
|
'Coulomb Energy' : coulomb_energy, |
|
'Force Field' : force_field |
|
} |
|
t_res_df = pd.DataFrame(t_res) |
|
print("Force Field T Cell Success") |
|
|
|
b_res_df.to_excel(self.target_path+'/'+'forcefield_b_cell.xlsx', index=False) |
|
t_res_df.to_excel(self.target_path+'/'+'forcefield_t_cell.xlsx', index=False) |
|
|
|
return b_res, t_res |
|
|
|
class ClassicalDockingWithAdjuvant: |
|
def __init__(self, bseq, tseq, base_path, target_path, n_receptor, n_adjuvant): |
|
self.bseq = bseq |
|
self.tseq = tseq |
|
self.base_path = base_path |
|
self.target_path = target_path |
|
self.n_receptor = n_receptor |
|
self.n_adjuvant = n_adjuvant |
|
|
|
def ForceField1(self): |
|
adjuvant = pd.read_csv(os.path.join(data_dir, 'PubChem_compound_text_adjuvant.csv')) |
|
adjuvant = adjuvant.sample(frac=1, random_state=42).reset_index(drop=True) |
|
adjuvant = adjuvant[0:self.n_adjuvant] |
|
b_cell_receptor = pd.read_csv(os.path.join(data_dir, 'b cell receptor homo sapiens.csv')) |
|
b_cell_receptor = b_cell_receptor.sample(frac=1, random_state=42).reset_index(drop=True) |
|
b_cell_receptor = b_cell_receptor[0:self.n_receptor] |
|
b_epitope = self.bseq |
|
|
|
seq1 = [] |
|
seq2 = [] |
|
seq2id = [] |
|
Attractive = [] |
|
Repulsive = [] |
|
VDW_lj_force = [] |
|
coulomb_energy = [] |
|
force_field = [] |
|
adjuvant_list = [] |
|
adjuvant_isosmiles = [] |
|
com1 = [] |
|
com2 = [] |
|
for b in b_epitope: |
|
for i in range(len(b_cell_receptor)): |
|
for adju in range(len(adjuvant)): |
|
adjuvant_list.append(adjuvant['cid'][adju]) |
|
|
|
adjuvant_isosmiles.append(adjuvant['isosmiles'][adju]) |
|
|
|
seq_plus_adjuvant = Chem.MolToSmiles(molecular_function.combine_epitope_with_adjuvant(b, adjuvant['isosmiles'][adju])) |
|
|
|
seq1.append(seq_plus_adjuvant) |
|
|
|
seq2.append(b_cell_receptor['seq'][i]) |
|
|
|
seq2id.append(b_cell_receptor['id'][i]) |
|
|
|
aa1 = molecular_function.calculate_amino_acid_center_of_mass_smiles(seq_plus_adjuvant) |
|
|
|
aa2 = molecular_function.calculate_amino_acid_center_of_mass(b_cell_receptor['seq'][i]) |
|
|
|
com1.append(aa1) |
|
|
|
com2.append(aa2) |
|
|
|
dist = molecular_function.calculate_distance_between_amino_acids(aa1, aa2) |
|
|
|
attract = ms.attractive_energy(dist) |
|
|
|
Attractive.append(attract) |
|
|
|
repulsive = ms.repulsive_energy(dist) |
|
|
|
Repulsive.append(repulsive) |
|
|
|
vdw = ms.lj_force(dist) |
|
|
|
VDW_lj_force.append(vdw) |
|
|
|
ce = ms.coulomb_energy(e, e, dist) |
|
|
|
coulomb_energy.append(ce) |
|
|
|
force_field.append(vdw+ce) |
|
|
|
print("===========================================\n\n") |
|
|
|
b_res = { |
|
'Ligand' : seq1, |
|
'Receptor' : seq2, |
|
'Receptor id' : seq2id, |
|
'Adjuvant CID' : adjuvant_list, |
|
'Adjuvant IsoSMILES' : adjuvant_isosmiles, |
|
'Attractive' : Attractive, |
|
'Repulsive' : Repulsive, |
|
'Center Of Ligand Mass' : com1, |
|
'Center Of Receptor Mass' : com2, |
|
'VDW LJ Force' : VDW_lj_force, |
|
'Coulomb Energy' : coulomb_energy, |
|
'Force Field' : force_field |
|
} |
|
b_res_df = pd.DataFrame(b_res) |
|
print("Force Field B Cell Success With Adjuvant") |
|
|
|
|
|
t_cell_receptor = pd.read_csv(os.path.join(data_dir, 't cell receptor homo sapiens.csv')) |
|
t_cell_receptor = t_cell_receptor.sample(frac=1, random_state=42).reset_index(drop=True) |
|
t_cell_receptor = t_cell_receptor[0:self.n_receptor] |
|
t_epitope = self.tseq |
|
|
|
seq1 = [] |
|
seq2 = [] |
|
seq2id = [] |
|
Attractive = [] |
|
Repulsive = [] |
|
VDW_lj_force = [] |
|
coulomb_energy = [] |
|
force_field = [] |
|
adjuvant_list = [] |
|
adjuvant_isosmiles = [] |
|
com1 = [] |
|
com2 = [] |
|
for b in t_epitope: |
|
for i in range(len(t_cell_receptor)): |
|
for adju in range(len(adjuvant)): |
|
adjuvant_list.append(adjuvant['cid'][adju]) |
|
adjuvant_isosmiles.append(adjuvant['isosmiles'][adju]) |
|
seq_plus_adjuvant = Chem.MolToSmiles(molecular_function.combine_epitope_with_adjuvant(b, adjuvant['isosmiles'][adju])) |
|
seq1.append(seq_plus_adjuvant) |
|
seq2.append(b_cell_receptor['seq'][i]) |
|
seq2id.append(b_cell_receptor['id'][i]) |
|
aa1 = molecular_function.calculate_amino_acid_center_of_mass_smiles(seq_plus_adjuvant) |
|
aa2 = molecular_function.calculate_amino_acid_center_of_mass(b_cell_receptor['seq'][i]) |
|
com1.append(aa1) |
|
com2.append(aa2) |
|
dist = molecular_function.calculate_distance_between_amino_acids(aa1, aa2) |
|
attract = ms.attractive_energy(dist) |
|
Attractive.append(attract) |
|
repulsive = ms.repulsive_energy(dist) |
|
Repulsive.append(repulsive) |
|
vdw = ms.lj_force(dist) |
|
VDW_lj_force.append(vdw) |
|
ce = ms.coulomb_energy(e, e, dist) |
|
coulomb_energy.append(ce) |
|
force_field.append(vdw+ce) |
|
|
|
t_res = { |
|
'Ligand' : seq1, |
|
'Receptor' : seq2, |
|
'Receptor id' : seq2id, |
|
'Adjuvant CID' : adjuvant_list, |
|
'Adjuvant IsoSMILES' : adjuvant_isosmiles, |
|
'Attractive' : Attractive, |
|
'Repulsive' : Repulsive, |
|
'Center Of Ligand Mass' : com1, |
|
'Center Of Receptor Mass' : com2, |
|
'VDW LJ Force' : VDW_lj_force, |
|
'Coulomb Energy' : coulomb_energy, |
|
'Force Field' : force_field |
|
} |
|
t_res_df = pd.DataFrame(t_res) |
|
print("Force Field T Cell Success With Adjuvant") |
|
|
|
b_res_df.to_excel(self.target_path+'/'+'forcefield_b_cell_with_adjuvant.xlsx', index=False) |
|
t_res_df.to_excel(self.target_path+'/'+'forcefield_t_cell_with_adjuvant.xlsx', index=False) |
|
|
|
return b_res, t_res |
|
|
|
class MLDocking: |
|
def __init__(self, bseq, tseq, base_path, target_path, n_receptor): |
|
self.bseq = bseq |
|
self.tseq = tseq |
|
self.base_path = base_path |
|
self.target_path = target_path |
|
self.n_receptor = n_receptor |
|
|
|
def MLDock1(self): |
|
b_cell_receptor = pd.read_csv(os.path.join(data_dir, 'b_receptor_v2.csv')) |
|
b_cell_receptor = b_cell_receptor.sample(frac=1, random_state=42).reset_index(drop=True) |
|
b_cell_receptor = b_cell_receptor[0:self.n_receptor] |
|
b_epitope = self.bseq |
|
|
|
seq1 = [] |
|
seq2 = [] |
|
seq2id = [] |
|
smilesseq1 = [] |
|
smilesseq2 = [] |
|
molwseq1 = [] |
|
molwseq2 = [] |
|
bdist = [] |
|
bmol_pred = [] |
|
com1 = [] |
|
com2 = [] |
|
|
|
for b in b_epitope: |
|
for i in range(len(b_cell_receptor)): |
|
seq1.append(b) |
|
seq2.append(b_cell_receptor['seq'][i]) |
|
seq2id.append(b_cell_receptor['id'][i]) |
|
aa1 = molecular_function.calculate_amino_acid_center_of_mass(b) |
|
smiles1 = molecular_function.seq_to_smiles(b) |
|
smilesseq1.append(smiles1) |
|
molwt1 = molecular_function.calculate_molecular_weight(smiles1) |
|
molwseq1.append(molwt1) |
|
aa2 = molecular_function.calculate_amino_acid_center_of_mass(b_cell_receptor['seq'][i]) |
|
smiles2 = molecular_function.seq_to_smiles(b_cell_receptor['seq'][i]) |
|
smilesseq2.append(smiles2) |
|
molwt2 = molecular_function.calculate_molecular_weight(smiles2) |
|
molwseq2.append(molwt2) |
|
dist = molecular_function.calculate_distance_between_amino_acids(aa1, aa2) |
|
bdist.append(dist) |
|
com1.append(aa1) |
|
com2.append(aa2) |
|
feature = [aa1, molwt1, aa2, molwt2, dist] |
|
|
|
bmol_pred.append(ml_dock(feature)) |
|
|
|
b_res = { |
|
'Ligand' : seq1, |
|
'Receptor' : seq2, |
|
'Receptor id' : seq2id, |
|
'Ligand Smiles' : smilesseq1, |
|
'Receptor Smiles' : smilesseq2, |
|
'Center Of Mass Ligand' : com1, |
|
'Center Of Mass Receptor' : com2, |
|
'Molecular Weight Of Ligand' : molwseq1, |
|
'Molecular Weight Of Receptor' : molwseq2, |
|
'Distance' : bdist, |
|
'Docking(Ki (nM))' : bmol_pred |
|
} |
|
|
|
b_res_df = pd.DataFrame(b_res) |
|
print("Machine Learning Based Scoring Function of B Cell Success") |
|
|
|
|
|
t_cell_receptor = pd.read_csv(os.path.join(data_dir, 't_receptor_v2.csv')) |
|
t_cell_receptor = t_cell_receptor.sample(frac=1, random_state=42).reset_index(drop=True) |
|
t_cell_receptor = t_cell_receptor[0:self.n_receptor] |
|
t_epitope = self.tseq |
|
|
|
seq1 = [] |
|
seq2 = [] |
|
seq2id = [] |
|
smilesseq1 = [] |
|
smilesseq2 = [] |
|
molwseq1 = [] |
|
molwseq2 = [] |
|
bdist = [] |
|
bmol_pred = [] |
|
com1 = [] |
|
com2 = [] |
|
|
|
for b in t_epitope: |
|
for i in range(len(t_cell_receptor)): |
|
seq1.append(b) |
|
seq2.append(t_cell_receptor['seq'][i]) |
|
seq2id.append(t_cell_receptor['id'][i]) |
|
aa1 = molecular_function.calculate_amino_acid_center_of_mass(b) |
|
smiles1 = molecular_function.seq_to_smiles(b) |
|
smilesseq1.append(smiles1) |
|
molwt1 = molecular_function.calculate_molecular_weight(smiles1) |
|
molwseq1.append(molwt1) |
|
aa2 = molecular_function.calculate_amino_acid_center_of_mass(t_cell_receptor['seq'][i]) |
|
smiles2 = molecular_function.seq_to_smiles(t_cell_receptor['seq'][i]) |
|
smilesseq2.append(smiles2) |
|
molwt2 = molecular_function.calculate_molecular_weight(smiles2) |
|
molwseq2.append(molwt2) |
|
dist = molecular_function.calculate_distance_between_amino_acids(aa1, aa2) |
|
bdist.append(dist) |
|
com1.append(aa1) |
|
com2.append(aa2) |
|
feature = [aa1, molwt1, aa2, molwt2, dist] |
|
|
|
bmol_pred.append(ml_dock(feature)) |
|
|
|
t_res = { |
|
'Ligand' : seq1, |
|
'Receptor' : seq2, |
|
'Receptor id' : seq2id, |
|
'Ligand Smiles' : smilesseq1, |
|
'Receptor Smiles' : smilesseq2, |
|
'Center Of Mass Ligand' : com1, |
|
'Center Of Mass Receptor' : com2, |
|
'Molecular Weight Of Ligand' : molwseq1, |
|
'Molecular Weight Of Receptor' : molwseq2, |
|
'Distance' : bdist, |
|
'Docking(Ki (nM))' : bmol_pred |
|
} |
|
|
|
t_res_df = pd.DataFrame(t_res) |
|
print("Machine Learning Based Scoring Function of T Cell Success") |
|
|
|
b_res_df.to_excel(self.target_path+'/'+'ml_scoring_func_b_cell.xlsx', index=False) |
|
t_res_df.to_excel(self.target_path+'/'+'ml_scoring_func_t_cell.xlsx', index=False) |
|
|
|
return b_res, t_res |
|
|
|
class MLDockingWithAdjuvant: |
|
def __init__(self, bseq, tseq, base_path, target_path, n_receptor, n_adjuvant): |
|
self.bseq = bseq |
|
self.tseq = tseq |
|
self.base_path = base_path |
|
self.target_path = target_path |
|
self.n_receptor = n_receptor |
|
self.n_adjuvant = n_adjuvant |
|
|
|
def MLDock1(self): |
|
adjuvant = pd.read_csv(os.path.join(data_dir, 'PubChem_compound_text_adjuvant.csv')) |
|
adjuvant = adjuvant.sample(frac=1, random_state=42).reset_index(drop=True) |
|
adjuvant = adjuvant[0:self.n_adjuvant] |
|
b_cell_receptor = pd.read_csv(os.path.join(data_dir, 'b_receptor_v2.csv')) |
|
b_cell_receptor = b_cell_receptor.sample(frac=1, random_state=42).reset_index(drop=True) |
|
b_cell_receptor = b_cell_receptor[0:self.n_receptor] |
|
b_epitope = self.bseq |
|
|
|
seq1 = [] |
|
seq2 = [] |
|
seq2id = [] |
|
adjuvant_list = [] |
|
adjuvant_isosmiles = [] |
|
seq2id = [] |
|
smilesseq1 = [] |
|
smilesseq2 = [] |
|
molwseq1 = [] |
|
molwseq2 = [] |
|
bdist = [] |
|
bmol_pred = [] |
|
com1 = [] |
|
com2 = [] |
|
for b in b_epitope: |
|
for i in range(len(b_cell_receptor)): |
|
for adju in range(len(adjuvant)): |
|
adjuvant_list.append(adjuvant['cid'][adju]) |
|
adjuvant_isosmiles.append(adjuvant['isosmiles'][adju]) |
|
seq_plus_adjuvant = Chem.MolToSmiles(molecular_function.combine_epitope_with_adjuvant(b, adjuvant['isosmiles'][adju])) |
|
seq1.append(b) |
|
seq2.append(b_cell_receptor['seq'][i]) |
|
seq2id.append(b_cell_receptor['id'][i]) |
|
aa1 = molecular_function.calculate_amino_acid_center_of_mass_smiles(seq_plus_adjuvant) |
|
smiles1 = seq_plus_adjuvant |
|
smilesseq1.append(smiles1) |
|
molwt1 = molecular_function.calculate_molecular_weight(smiles1) |
|
molwseq1.append(molwt1) |
|
aa2 = molecular_function.calculate_amino_acid_center_of_mass(b_cell_receptor['seq'][i]) |
|
smiles2 = molecular_function.seq_to_smiles(b_cell_receptor['seq'][i]) |
|
smilesseq2.append(smiles2) |
|
molwt2 = molecular_function.calculate_molecular_weight(smiles2) |
|
molwseq2.append(molwt2) |
|
dist = molecular_function.calculate_distance_between_amino_acids(aa1, aa2) |
|
bdist.append(dist) |
|
feature = [aa1, molwt1, aa2, molwt2, dist] |
|
com1.append(aa1) |
|
com2.append(aa2) |
|
|
|
bmol_pred.append(ml_dock(feature)) |
|
|
|
b_res = { |
|
'Ligand' : seq1, |
|
'Receptor' : seq2, |
|
'Receptor id' : seq2id, |
|
'Adjuvant CID' : adjuvant_list, |
|
'Adjuvant IsoSMILES' : adjuvant_isosmiles, |
|
'Ligand Smiles' : smilesseq1, |
|
'Receptor Smiles' : smilesseq2, |
|
'Center Of Mass Ligand' : com1, |
|
'Center Of Mass Receptor' : com2, |
|
'Molecular Weight Of Ligand' : molwseq1, |
|
'Molecular Weight Of Receptor' : molwseq2, |
|
'Distance' : bdist, |
|
'Docking(Ki (nM))' : bmol_pred |
|
} |
|
|
|
b_res_df = pd.DataFrame(b_res) |
|
print("Machine Learning Based Scoring Function of B Cell Success With Adjuvant") |
|
|
|
|
|
t_cell_receptor = pd.read_csv(os.path.join(data_dir, 't cell receptor homo sapiens.csv')) |
|
t_cell_receptor = t_cell_receptor.sample(frac=1, random_state=42).reset_index(drop=True) |
|
t_cell_receptor = t_cell_receptor[0:self.n_receptor] |
|
t_epitope = self.tseq |
|
|
|
seq1 = [] |
|
seq2 = [] |
|
seq2id = [] |
|
adjuvant_list = [] |
|
adjuvant_isosmiles = [] |
|
seq2id = [] |
|
smilesseq1 = [] |
|
smilesseq2 = [] |
|
molwseq1 = [] |
|
molwseq2 = [] |
|
bdist = [] |
|
bmol_pred = [] |
|
com1 = [] |
|
com2 = [] |
|
for b in t_epitope: |
|
for i in range(len(t_cell_receptor)): |
|
for adju in range(len(adjuvant)): |
|
adjuvant_list.append(adjuvant['cid'][adju]) |
|
adjuvant_isosmiles.append(adjuvant['isosmiles'][adju]) |
|
seq_plus_adjuvant = Chem.MolToSmiles(molecular_function.combine_epitope_with_adjuvant(b, adjuvant['isosmiles'][adju])) |
|
seq1.append(b) |
|
seq2.append(t_cell_receptor['seq'][i]) |
|
seq2id.append(t_cell_receptor['id'][i]) |
|
aa1 = molecular_function.calculate_amino_acid_center_of_mass_smiles(seq_plus_adjuvant) |
|
smiles1 = seq_plus_adjuvant |
|
smilesseq1.append(smiles1) |
|
molwt1 = molecular_function.calculate_molecular_weight(smiles1) |
|
molwseq1.append(molwt1) |
|
aa2 = molecular_function.calculate_amino_acid_center_of_mass(t_cell_receptor['seq'][i]) |
|
smiles2 = molecular_function.seq_to_smiles(t_cell_receptor['seq'][i]) |
|
smilesseq2.append(smiles2) |
|
molwt2 = molecular_function.calculate_molecular_weight(smiles2) |
|
molwseq2.append(molwt2) |
|
dist = molecular_function.calculate_distance_between_amino_acids(aa1, aa2) |
|
bdist.append(dist) |
|
feature = [aa1, molwt1, aa2, molwt2, dist] |
|
com1.append(aa1) |
|
com2.append(aa2) |
|
|
|
bmol_pred.append(ml_dock(feature)) |
|
|
|
|
|
t_res = { |
|
'Ligand' : seq1, |
|
'Receptor' : seq2, |
|
'Receptor id' : seq2id, |
|
'Adjuvant CID' : adjuvant_list, |
|
'Adjuvant IsoSMILES' : adjuvant_isosmiles, |
|
'Ligand Smiles' : smilesseq1, |
|
'Receptor Smiles' : smilesseq2, |
|
'Center Of Mass Ligand' : com1, |
|
'Center Of Mass Receptor' : com2, |
|
'Molecular Weight Of Ligand' : molwseq1, |
|
'Molecular Weight Of Receptor' : molwseq2, |
|
'Distance' : bdist, |
|
'Docking(Ki (nM))' : bmol_pred |
|
} |
|
t_res_df = pd.DataFrame(t_res) |
|
print("Machine Learning Based Scoring Function of T Cell Success With Adjuvant") |
|
|
|
b_res_df.to_excel(self.target_path+'/'+'ml_b_cell_with_adjuvant.xlsx', index=False) |
|
t_res_df.to_excel(self.target_path+'/'+'ml_t_cell_with_adjuvant.xlsx', index=False) |
|
|
|
return b_res, t_res |
|
|
|
class QMLDocking: |
|
def __init__(self, bseq, tseq, base_path, target_path, n_receptor, sampler=None): |
|
self.bseq = bseq |
|
self.tseq = tseq |
|
self.base_path = base_path |
|
self.target_path = target_path |
|
self.n_receptor = n_receptor |
|
self.sampler = sampler |
|
|
|
def MLDock1(self): |
|
b_cell_receptor = pd.read_csv(os.path.join(data_dir, 'b_receptor_v2.csv')) |
|
b_cell_receptor = b_cell_receptor.sample(frac=1, random_state=42).reset_index(drop=True) |
|
b_cell_receptor = b_cell_receptor[0:self.n_receptor] |
|
b_epitope = self.bseq |
|
|
|
seq1 = [] |
|
seq2 = [] |
|
seq2id = [] |
|
smilesseq1 = [] |
|
smilesseq2 = [] |
|
molwseq1 = [] |
|
molwseq2 = [] |
|
bdist = [] |
|
bmol_pred = [] |
|
com1 = [] |
|
com2 = [] |
|
|
|
for b in b_epitope: |
|
for i in range(len(b_cell_receptor)): |
|
seq1.append(b) |
|
seq2.append(b_cell_receptor['seq'][i]) |
|
seq2id.append(b_cell_receptor['id'][i]) |
|
aa1 = molecular_function.calculate_amino_acid_center_of_mass(b) |
|
smiles1 = molecular_function.seq_to_smiles(b) |
|
smilesseq1.append(smiles1) |
|
molwt1 = molecular_function.calculate_molecular_weight(smiles1) |
|
molwseq1.append(molwt1) |
|
aa2 = molecular_function.calculate_amino_acid_center_of_mass(b_cell_receptor['seq'][i]) |
|
smiles2 = molecular_function.seq_to_smiles(b_cell_receptor['seq'][i]) |
|
smilesseq2.append(smiles2) |
|
molwt2 = molecular_function.calculate_molecular_weight(smiles2) |
|
molwseq2.append(molwt2) |
|
dist = molecular_function.calculate_distance_between_amino_acids(aa1, aa2) |
|
bdist.append(dist) |
|
com1.append(aa1) |
|
com2.append(aa2) |
|
feature = [aa1, molwt1, aa2, molwt2, dist] |
|
|
|
bmol_pred.append(qml_dock(feature, self.sampler)) |
|
|
|
b_res = { |
|
'Ligand' : seq1, |
|
'Receptor' : seq2, |
|
'Receptor id' : seq2id, |
|
'Ligand Smiles' : smilesseq1, |
|
'Receptor Smiles' : smilesseq2, |
|
'Center Of Mass Ligand' : com1, |
|
'Center Of Mass Receptor' : com2, |
|
'Molecular Weight Of Ligand' : molwseq1, |
|
'Molecular Weight Of Receptor' : molwseq2, |
|
'Distance' : bdist, |
|
'Docking(Ki (nM))' : bmol_pred |
|
} |
|
|
|
b_res_df = pd.DataFrame(b_res) |
|
print("Quantum Machine Learning Based Scoring Function of B Cell Success") |
|
|
|
|
|
t_cell_receptor = pd.read_csv(os.path.join(data_dir, 't_receptor_v2.csv')) |
|
t_cell_receptor = t_cell_receptor.sample(frac=1, random_state=42).reset_index(drop=True) |
|
t_cell_receptor = t_cell_receptor[0:self.n_receptor] |
|
t_epitope = self.tseq |
|
|
|
seq1 = [] |
|
seq2 = [] |
|
seq2id = [] |
|
smilesseq1 = [] |
|
smilesseq2 = [] |
|
molwseq1 = [] |
|
molwseq2 = [] |
|
bdist = [] |
|
bmol_pred = [] |
|
com1 = [] |
|
com2 = [] |
|
|
|
for b in t_epitope: |
|
for i in range(len(t_cell_receptor)): |
|
seq1.append(b) |
|
seq2.append(t_cell_receptor['seq'][i]) |
|
seq2id.append(t_cell_receptor['id'][i]) |
|
aa1 = molecular_function.calculate_amino_acid_center_of_mass(b) |
|
smiles1 = molecular_function.seq_to_smiles(b) |
|
smilesseq1.append(smiles1) |
|
molwt1 = molecular_function.calculate_molecular_weight(smiles1) |
|
molwseq1.append(molwt1) |
|
aa2 = molecular_function.calculate_amino_acid_center_of_mass(t_cell_receptor['seq'][i]) |
|
smiles2 = molecular_function.seq_to_smiles(t_cell_receptor['seq'][i]) |
|
smilesseq2.append(smiles2) |
|
molwt2 = molecular_function.calculate_molecular_weight(smiles2) |
|
molwseq2.append(molwt2) |
|
dist = molecular_function.calculate_distance_between_amino_acids(aa1, aa2) |
|
bdist.append(dist) |
|
com1.append(aa1) |
|
com2.append(aa2) |
|
feature = [aa1, molwt1, aa2, molwt2, dist] |
|
|
|
bmol_pred.append(qml_dock(feature, self.sampler)) |
|
|
|
t_res = { |
|
'Ligand' : seq1, |
|
'Receptor' : seq2, |
|
'Receptor id' : seq2id, |
|
'Ligand Smiles' : smilesseq1, |
|
'Receptor Smiles' : smilesseq2, |
|
'Center Of Mass Ligand' : com1, |
|
'Center Of Mass Receptor' : com2, |
|
'Molecular Weight Of Ligand' : molwseq1, |
|
'Molecular Weight Of Receptor' : molwseq2, |
|
'Distance' : bdist, |
|
'Docking(Ki (nM))' : bmol_pred |
|
} |
|
|
|
t_res_df = pd.DataFrame(t_res) |
|
print("Machine Learning Based Scoring Function of T Cell Success") |
|
|
|
b_res_df.to_excel(self.target_path+'/'+'qml_scoring_func_b_cell.xlsx', index=False) |
|
t_res_df.to_excel(self.target_path+'/'+'qml_scoring_func_t_cell.xlsx', index=False) |
|
|
|
return b_res, t_res |
|
|
|
class QMLDockingWithAdjuvant: |
|
def __init__(self, bseq, tseq, base_path, target_path, n_receptor, n_adjuvant, sampler=None): |
|
self.bseq = bseq |
|
self.tseq = tseq |
|
self.base_path = base_path |
|
self.target_path = target_path |
|
self.n_receptor = n_receptor |
|
self.n_adjuvant = n_adjuvant |
|
self.sampler = sampler |
|
|
|
def MLDock1(self): |
|
adjuvant = pd.read_csv(os.path.join(data_dir, 'PubChem_compound_text_adjuvant.csv')) |
|
adjuvant = adjuvant.sample(frac=1, random_state=42).reset_index(drop=True) |
|
adjuvant = adjuvant[0:self.n_adjuvant] |
|
b_cell_receptor = pd.read_csv(os.path.join(data_dir, 'b_receptor_v2.csv')) |
|
b_cell_receptor = b_cell_receptor.sample(frac=1, random_state=42).reset_index(drop=True) |
|
b_cell_receptor = b_cell_receptor[0:self.n_receptor] |
|
b_epitope = self.bseq |
|
|
|
seq1 = [] |
|
seq2 = [] |
|
seq2id = [] |
|
adjuvant_list = [] |
|
adjuvant_isosmiles = [] |
|
seq2id = [] |
|
smilesseq1 = [] |
|
smilesseq2 = [] |
|
molwseq1 = [] |
|
molwseq2 = [] |
|
bdist = [] |
|
bmol_pred = [] |
|
com1 = [] |
|
com2 = [] |
|
for b in b_epitope: |
|
for i in range(len(b_cell_receptor)): |
|
for adju in range(len(adjuvant)): |
|
adjuvant_list.append(adjuvant['cid'][adju]) |
|
adjuvant_isosmiles.append(adjuvant['isosmiles'][adju]) |
|
seq_plus_adjuvant = Chem.MolToSmiles(molecular_function.combine_epitope_with_adjuvant(b, adjuvant['isosmiles'][adju])) |
|
seq1.append(b) |
|
seq2.append(b_cell_receptor['seq'][i]) |
|
seq2id.append(b_cell_receptor['id'][i]) |
|
aa1 = molecular_function.calculate_amino_acid_center_of_mass_smiles(seq_plus_adjuvant) |
|
smiles1 = seq_plus_adjuvant |
|
smilesseq1.append(smiles1) |
|
molwt1 = molecular_function.calculate_molecular_weight(smiles1) |
|
molwseq1.append(molwt1) |
|
aa2 = molecular_function.calculate_amino_acid_center_of_mass(b_cell_receptor['seq'][i]) |
|
smiles2 = molecular_function.seq_to_smiles(b_cell_receptor['seq'][i]) |
|
smilesseq2.append(smiles2) |
|
molwt2 = molecular_function.calculate_molecular_weight(smiles2) |
|
molwseq2.append(molwt2) |
|
dist = molecular_function.calculate_distance_between_amino_acids(aa1, aa2) |
|
bdist.append(dist) |
|
feature = [aa1, molwt1, aa2, molwt2, dist] |
|
com1.append(aa1) |
|
com2.append(aa2) |
|
|
|
bmol_pred.append(qml_dock(feature,self.sampler)) |
|
|
|
b_res = { |
|
'Ligand' : seq1, |
|
'Receptor' : seq2, |
|
'Receptor id' : seq2id, |
|
'Adjuvant CID' : adjuvant_list, |
|
'Adjuvant IsoSMILES' : adjuvant_isosmiles, |
|
'Ligand Smiles' : smilesseq1, |
|
'Receptor Smiles' : smilesseq2, |
|
'Center Of Mass Ligand' : com1, |
|
'Center Of Mass Receptor' : com2, |
|
'Molecular Weight Of Ligand' : molwseq1, |
|
'Molecular Weight Of Receptor' : molwseq2, |
|
'Distance' : bdist, |
|
'Docking(Ki (nM))' : bmol_pred |
|
} |
|
|
|
b_res_df = pd.DataFrame(b_res) |
|
print("Machine Learning Based Scoring Function of B Cell Success With Adjuvant") |
|
|
|
|
|
t_cell_receptor = pd.read_csv(os.path.join(data_dir, 't cell receptor homo sapiens.csv')) |
|
t_cell_receptor = t_cell_receptor.sample(frac=1, random_state=42).reset_index(drop=True) |
|
t_cell_receptor = t_cell_receptor[0:self.n_receptor] |
|
t_epitope = self.tseq |
|
|
|
seq1 = [] |
|
seq2 = [] |
|
seq2id = [] |
|
adjuvant_list = [] |
|
adjuvant_isosmiles = [] |
|
seq2id = [] |
|
smilesseq1 = [] |
|
smilesseq2 = [] |
|
molwseq1 = [] |
|
molwseq2 = [] |
|
bdist = [] |
|
bmol_pred = [] |
|
com1 = [] |
|
com2 = [] |
|
for b in t_epitope: |
|
for i in range(len(t_cell_receptor)): |
|
for adju in range(len(adjuvant)): |
|
adjuvant_list.append(adjuvant['cid'][adju]) |
|
adjuvant_isosmiles.append(adjuvant['isosmiles'][adju]) |
|
seq_plus_adjuvant = Chem.MolToSmiles(molecular_function.combine_epitope_with_adjuvant(b, adjuvant['isosmiles'][adju])) |
|
seq1.append(b) |
|
seq2.append(t_cell_receptor['seq'][i]) |
|
seq2id.append(t_cell_receptor['id'][i]) |
|
aa1 = molecular_function.calculate_amino_acid_center_of_mass_smiles(seq_plus_adjuvant) |
|
smiles1 = seq_plus_adjuvant |
|
smilesseq1.append(smiles1) |
|
molwt1 = molecular_function.calculate_molecular_weight(smiles1) |
|
molwseq1.append(molwt1) |
|
aa2 = molecular_function.calculate_amino_acid_center_of_mass(t_cell_receptor['seq'][i]) |
|
smiles2 = molecular_function.seq_to_smiles(t_cell_receptor['seq'][i]) |
|
smilesseq2.append(smiles2) |
|
molwt2 = molecular_function.calculate_molecular_weight(smiles2) |
|
molwseq2.append(molwt2) |
|
dist = molecular_function.calculate_distance_between_amino_acids(aa1, aa2) |
|
bdist.append(dist) |
|
feature = [aa1, molwt1, aa2, molwt2, dist] |
|
com1.append(aa1) |
|
com2.append(aa2) |
|
|
|
bmol_pred.append(qml_dock(feature, self.sampler)) |
|
|
|
|
|
t_res = { |
|
'Ligand' : seq1, |
|
'Receptor' : seq2, |
|
'Receptor id' : seq2id, |
|
'Adjuvant CID' : adjuvant_list, |
|
'Adjuvant IsoSMILES' : adjuvant_isosmiles, |
|
'Ligand Smiles' : smilesseq1, |
|
'Receptor Smiles' : smilesseq2, |
|
'Center Of Mass Ligand' : com1, |
|
'Center Of Mass Receptor' : com2, |
|
'Molecular Weight Of Ligand' : molwseq1, |
|
'Molecular Weight Of Receptor' : molwseq2, |
|
'Distance' : bdist, |
|
'Docking(Ki (nM))' : bmol_pred |
|
} |
|
t_res_df = pd.DataFrame(t_res) |
|
print("Machine Learning Based Scoring Function of T Cell Success With Adjuvant") |
|
|
|
b_res_df.to_excel(self.target_path+'/'+'qml_b_cell_with_adjuvant.xlsx', index=False) |
|
t_res_df.to_excel(self.target_path+'/'+'qml_t_cell_with_adjuvant.xlsx', index=False) |
|
|
|
return b_res, t_res |
|
|
|
|
|
class molecular_function: |
|
def calculate_amino_acid_center_of_mass(sequence): |
|
try: |
|
amino_acid_masses = [] |
|
for aa in sequence: |
|
try: |
|
amino_acid_masses.append(Chem.Descriptors.MolWt(Chem.MolFromSequence(aa))) |
|
except: |
|
return 0 |
|
break |
|
|
|
|
|
total_mass = sum(amino_acid_masses) |
|
center_of_mass = sum(i * mass for i, mass in enumerate(amino_acid_masses, start=1)) / total_mass |
|
|
|
return center_of_mass |
|
except: |
|
return 0 |
|
|
|
def calculate_amino_acid_center_of_mass_smiles(sequence): |
|
try: |
|
amino_acid_masses = [] |
|
for aa in sequence: |
|
amino_acid_masses.append(Chem.Descriptors.MolWt(Chem.MolFromSmiles(aa))) |
|
|
|
|
|
total_mass = sum(amino_acid_masses) |
|
center_of_mass = sum(i * mass for i, mass in enumerate(amino_acid_masses, start=1)) / total_mass |
|
|
|
return center_of_mass |
|
except: |
|
return 0 |
|
|
|
def calculate_distance_between_amino_acids(aa1, aa2): |
|
|
|
distance = abs(aa1 - aa2) |
|
return distance |
|
|
|
def generate_conformer(molecule_smiles): |
|
mol = Chem.MolFromSmiles(molecule_smiles) |
|
mol = Chem.AddHs(mol) |
|
conformer = AllChem.EmbedMolecule(mol, useRandomCoords=True, randomSeed=42) |
|
return mol |
|
|
|
from rdkit import Chem |
|
from rdkit.Chem import rdMolTransforms |
|
|
|
def calculate_molecular_center_of_mass(smiles): |
|
molecule = molecular_function.generate_conformer(smiles) |
|
try: |
|
if molecule is None: |
|
return None |
|
|
|
|
|
center_of_mass = rdMolTransforms.ComputeCentroid(molecule.GetConformer()) |
|
total_mass = Descriptors.MolWt(molecule) |
|
|
|
|
|
center_of_mass = sum([center_of_mass[i] * total_mass for i in range(len(center_of_mass))]) / total_mass |
|
|
|
|
|
return center_of_mass |
|
except Exception as e: |
|
print("Error:", str(e)) |
|
return None |
|
|
|
def seq_to_smiles(seq): |
|
try: |
|
mol = Chem.MolFromSequence(seq) |
|
smiles = Chem.MolToSmiles(mol,kekuleSmiles=True) |
|
return str(smiles) |
|
except: |
|
return None |
|
|
|
def combine_epitope_with_adjuvant(epitope_sequence, adjuvant_smiles): |
|
|
|
|
|
epitope_molecule = molecular_function.MolFromLongSequence(epitope_sequence) |
|
|
|
|
|
|
|
adjuvant_molecule = molecular_function.MolFromLongSequence(adjuvant_smiles) |
|
|
|
|
|
combined_molecule = Chem.CombineMols(adjuvant_molecule, epitope_molecule) |
|
|
|
return combined_molecule |
|
|
|
def calculate_molecular_weight(molecule_smiles): |
|
try: |
|
|
|
mol = Chem.MolFromSmiles(molecule_smiles) |
|
if mol is None: |
|
print("Gagal membaca molekul.") |
|
return None |
|
|
|
|
|
molecular_weight = Descriptors.MolWt(mol) |
|
|
|
return molecular_weight |
|
except: |
|
return None |
|
|
|
def calculate_amino_acid_center_of_mass(sequence): |
|
try: |
|
amino_acid_masses = [] |
|
for aa in sequence: |
|
try: |
|
amino_acid_masses.append(Chem.Descriptors.MolWt(molecular_function.MolFromLongSequence(aa))) |
|
except: |
|
return 0 |
|
break |
|
|
|
|
|
total_mass = sum(amino_acid_masses) |
|
center_of_mass = sum(i * mass for i, mass in enumerate(amino_acid_masses, start=1)) / total_mass |
|
|
|
return center_of_mass |
|
except: |
|
return 0 |
|
|
|
def MolFromLongSequence(sequence, chunk_size=100): |
|
"""Convert a long sequence into a Mol object by splitting into chunks and combining.""" |
|
|
|
chunks = [sequence[i:i+chunk_size] for i in range(0, len(sequence), chunk_size)] |
|
|
|
|
|
mols = [Chem.MolFromSequence(chunk) for chunk in chunks if chunk] |
|
|
|
|
|
combined_mol = Chem.Mol() |
|
for mol in mols: |
|
if mol: |
|
combined_mol = Chem.CombineMols(combined_mol, mol) |
|
|
|
return combined_mol |
|
|
|
def calculate_amino_acid_center_of_mass(sequence): |
|
try: |
|
amino_acid_masses = [] |
|
for aa in sequence: |
|
try: |
|
amino_acid_masses.append(Chem.Descriptors.MolWt(molecular_function.MolFromLongSequence(aa))) |
|
except: |
|
return 0 |
|
break |
|
|
|
|
|
total_mass = sum(amino_acid_masses) |
|
center_of_mass = sum(i * mass for i, mass in enumerate(amino_acid_masses, start=1)) / total_mass |
|
|
|
return center_of_mass |
|
except: |
|
return 0 |
|
|
|
def calculate_distance_between_amino_acids(aa1, aa2): |
|
|
|
distance = abs(aa1 - aa2) |
|
return distance |
|
|
|
def seq_to_smiles(seq): |
|
try: |
|
mol = molecular_function.MolFromLongSequence(seq) |
|
smiles = Chem.MolToSmiles(mol,kekuleSmiles=True) |
|
return str(smiles) |
|
except: |
|
return None |
|
|
|
def calculate_molecular_center_of_mass(smiles): |
|
molecule = molecular_function.generate_conformer(smiles) |
|
try: |
|
if molecule is None: |
|
return None |
|
|
|
|
|
center_of_mass = rdMolTransforms.ComputeCentroid(molecule.GetConformer()) |
|
total_mass = Descriptors.MolWt(molecule) |
|
|
|
|
|
center_of_mass = sum([center_of_mass[i] * total_mass for i in range(len(center_of_mass))]) / total_mass |
|
|
|
|
|
return center_of_mass |
|
except Exception as e: |
|
print("Error:", str(e)) |
|
return None |
|
|
|
def calculate_molecular_weight(molecule_smiles): |
|
try: |
|
|
|
mol = Chem.MolFromSmiles(molecule_smiles) |
|
if mol is None: |
|
print("Gagal membaca molekul.") |
|
return None |
|
|
|
|
|
molecular_weight = Descriptors.MolWt(mol) |
|
|
|
return molecular_weight |
|
except: |
|
return None |
|
|
|
class ms: |
|
def __init__(self): |
|
self.mass_of_argon = 39.948 |
|
|
|
@staticmethod |
|
def attractive_energy(r, epsilon=0.0103, sigma=3.4): |
|
""" |
|
Attractive component of the Lennard-Jones |
|
interactionenergy. |
|
|
|
Parameters |
|
---------- |
|
r: float |
|
Distance between two particles (Å) |
|
epsilon: float |
|
Negative of the potential energy at the |
|
equilibrium bond length (eV) |
|
sigma: float |
|
Distance at which the potential energy is |
|
zero (Å) |
|
|
|
Returns |
|
------- |
|
float |
|
Energy of attractive component of |
|
Lennard-Jones interaction (eV) |
|
""" |
|
if r == 0: |
|
return 0 |
|
return -4.0 * epsilon * np.power(sigma / r, 6) |
|
|
|
@staticmethod |
|
def repulsive_energy(r, epsilon=0.0103, sigma=3.4): |
|
""" |
|
Repulsive component of the Lennard-Jones |
|
interactionenergy. |
|
|
|
Parameters |
|
---------- |
|
r: float |
|
Distance between two particles (Å) |
|
epsilon: float |
|
Negative of the potential energy at the |
|
equilibrium bond length (eV) |
|
sigma: float |
|
Distance at which the potential energy is |
|
zero (Å) |
|
|
|
Returns |
|
------- |
|
float |
|
Energy of repulsive component of |
|
Lennard-Jones interaction (eV) |
|
""" |
|
if r == 0: |
|
return 0 |
|
return 4 * epsilon * np.power(sigma / r, 12) |
|
|
|
@staticmethod |
|
def lj_energy(r, epsilon=0.0103, sigma=3.4): |
|
""" |
|
Implementation of the Lennard-Jones potential |
|
to calculate the energy of the interaction. |
|
|
|
Parameters |
|
---------- |
|
r: float |
|
Distance between two particles (Å) |
|
epsilon: float |
|
Negative of the potential energy at the |
|
equilibrium bond length (eV) |
|
sigma: float |
|
Distance at which the potential energy is |
|
zero (Å) |
|
|
|
Returns |
|
------- |
|
float |
|
Energy of the Lennard-Jones potential |
|
model (eV) |
|
""" |
|
if r == 0: |
|
return 0 |
|
|
|
return ms.repulsive_energy( |
|
r, epsilon, sigma) + ms.attractive_energy( |
|
r, epsilon, sigma) |
|
|
|
@staticmethod |
|
def coulomb_energy(qi, qj, r): |
|
""" |
|
Calculation of Coulomb's law. |
|
|
|
Parameters |
|
---------- |
|
qi: float |
|
Electronic charge on particle i |
|
qj: float |
|
Electronic charge on particle j |
|
r: float |
|
Distance between particles i and j (Å) |
|
|
|
Returns |
|
------- |
|
float |
|
Energy of the Coulombic interaction (eV) |
|
""" |
|
if r == 0: |
|
return 0 |
|
|
|
energy_joules = (qi * qj * e ** 2) / ( |
|
4 * np.pi * epsilon_0 * r * 1e-10) |
|
return energy_joules / 1.602e-19 |
|
|
|
@staticmethod |
|
def bonded(kb, b0, b): |
|
""" |
|
Calculation of the potential energy of a bond. |
|
|
|
Parameters |
|
---------- |
|
kb: float |
|
Bond force constant (units: eV/Å^2) |
|
b0: float |
|
Equilibrium bond length (units: Å) |
|
b: float |
|
Bond length (units: Å) |
|
|
|
Returns |
|
float |
|
Energy of the bonded interaction |
|
""" |
|
|
|
return kb / 2 * (b - b0) ** 2 |
|
|
|
@staticmethod |
|
def lj_force(r, epsilon=0.0103, sigma=3.4): |
|
""" |
|
Implementation of the Lennard-Jones potential |
|
to calculate the force of the interaction. |
|
|
|
Parameters |
|
---------- |
|
r: float |
|
Distance between two particles (Å) |
|
epsilon: float |
|
Potential energy at the equilibrium bond |
|
length (eV) |
|
sigma: float |
|
Distance at which the potential energy is |
|
zero (Å) |
|
|
|
Returns |
|
------- |
|
float |
|
Force of the van der Waals interaction (eV/Å) |
|
""" |
|
if r != 0: |
|
return 48 * epsilon * np.power( |
|
sigma, 12) / np.power( |
|
r, 13) - 24 * epsilon * np.power( |
|
sigma, 6) / np.power(r, 7) |
|
else: |
|
return 0 |
|
@staticmethod |
|
def init_velocity(T, number_of_particles): |
|
""" |
|
Initialise the velocities for a series of |
|
particles. |
|
|
|
Parameters |
|
---------- |
|
T: float |
|
Temperature of the system at |
|
initialisation (K) |
|
number_of_particles: int |
|
Number of particles in the system |
|
|
|
Returns |
|
------- |
|
ndarray of floats |
|
Initial velocities for a series of |
|
particles (eVs/Åamu) |
|
""" |
|
R = np.random.rand(number_of_particles) - 0.5 |
|
return R * np.sqrt(Boltzmann * T / ( |
|
ms.mass_of_argon * 1.602e-19)) |
|
@staticmethod |
|
def get_accelerations(positions): |
|
""" |
|
Calculate the acceleration on each particle |
|
as a result of each other particle. |
|
N.B. We use the Python convention of |
|
numbering from 0. |
|
|
|
Parameters |
|
---------- |
|
positions: ndarray of floats |
|
The positions, in a single dimension, |
|
for all of the particles |
|
|
|
Returns |
|
------- |
|
ndarray of floats |
|
The acceleration on each |
|
particle (eV/Åamu) |
|
""" |
|
accel_x = np.zeros((positions.size, positions.size)) |
|
for i in range(0, positions.size - 1): |
|
for j in range(i + 1, positions.size): |
|
r_x = positions[j] - positions[i] |
|
rmag = np.sqrt(r_x * r_x) |
|
force_scalar = ms.lj_force(rmag, 0.0103, 3.4) |
|
force_x = force_scalar * r_x / rmag |
|
accel_x[i, j] = force_x / ms.mass_of_argon |
|
accel_x[j, i] = - force_x / ms.mass_of_argon |
|
return np.sum(accel_x, axis=0) |
|
@staticmethod |
|
def update_pos(x, v, a, dt): |
|
""" |
|
Update the particle positions. |
|
|
|
Parameters |
|
---------- |
|
x: ndarray of floats |
|
The positions of the particles in a |
|
single dimension |
|
v: ndarray of floats |
|
The velocities of the particles in a |
|
single dimension |
|
a: ndarray of floats |
|
The accelerations of the particles in a |
|
single dimension |
|
dt: float |
|
The timestep length |
|
|
|
Returns |
|
------- |
|
ndarray of floats: |
|
New positions of the particles in a single |
|
dimension |
|
""" |
|
return x + v * dt + 0.5 * a * dt * dt |
|
|
|
@staticmethod |
|
def update_velo(v, a, a1, dt): |
|
""" |
|
Update the particle velocities. |
|
|
|
Parameters |
|
---------- |
|
v: ndarray of floats |
|
The velocities of the particles in a |
|
single dimension (eVs/Åamu) |
|
a: ndarray of floats |
|
The accelerations of the particles in a |
|
single dimension at the previous |
|
timestep (eV/Åamu) |
|
a1: ndarray of floats |
|
The accelerations of the particles in a |
|
single dimension at the current |
|
timestep (eV/Åamu) |
|
dt: float |
|
The timestep length |
|
|
|
Returns |
|
------- |
|
ndarray of floats: |
|
New velocities of the particles in a |
|
single dimension (eVs/Åamu) |
|
""" |
|
return v + 0.5 * (a + a1) * dt |
|
@staticmethod |
|
def run_md(dt, number_of_steps, initial_temp, x): |
|
""" |
|
Run a MD simulation. |
|
|
|
Parameters |
|
---------- |
|
dt: float |
|
The timestep length (s) |
|
number_of_steps: int |
|
Number of iterations in the simulation |
|
initial_temp: float |
|
Temperature of the system at |
|
initialisation (K) |
|
x: ndarray of floats |
|
The initial positions of the particles in a |
|
single dimension (Å) |
|
|
|
Returns |
|
------- |
|
ndarray of floats |
|
The positions for all of the particles |
|
throughout the simulation (Å) |
|
""" |
|
positions = np.zeros((number_of_steps, 3)) |
|
v = ms.init_velocity(initial_temp, 3) |
|
a = ms.get_accelerations(x) |
|
for i in range(number_of_steps): |
|
x = ms.update_pos(x, v, a, dt) |
|
a1 = ms.get_accelerations(x) |
|
v = ms.update_velo(v, a, a1, dt) |
|
a = np.array(a1) |
|
positions[i, :] = x |
|
return positions |
|
|
|
|
|
@staticmethod |
|
def lj_force_cutoff(r, epsilon, sigma): |
|
""" |
|
Implementation of the Lennard-Jones potential |
|
to calculate the force of the interaction which |
|
is considerate of the cut-off. |
|
|
|
Parameters |
|
---------- |
|
r: float |
|
Distance between two particles (Å) |
|
epsilon: float |
|
Potential energy at the equilibrium bond |
|
length (eV) |
|
sigma: float |
|
Distance at which the potential energy is |
|
zero (Å) |
|
|
|
Returns |
|
------- |
|
float |
|
Force of the van der Waals interaction (eV/Å) |
|
""" |
|
cutoff = 15 |
|
if r < cutoff: |
|
return 48 * epsilon * np.power( |
|
sigma / r, 13) - 24 * epsilon * np.power( |
|
sigma / r, 7) |
|
else: |
|
return 0 |
|
|
|
def main(): |
|
|
|
arguments = sys.argv[1:] |
|
|
|
|
|
sequence = "" |
|
n_receptor = 0 |
|
n_adjuvant = 0 |
|
blast_activate = False |
|
llm_url = "" |
|
alphafold_url = "" |
|
|
|
|
|
for arg in arguments: |
|
key, value = arg.split("=") |
|
if key == "sequence": |
|
sequence = value |
|
elif key == "n_receptor": |
|
n_receptor = int(value) |
|
elif key == "n_adjuvant": |
|
n_adjuvant = int(value) |
|
elif key == "blast_activate": |
|
blast_activate = value.lower() == "true" |
|
elif key == "llm_url": |
|
llm_url = value |
|
elif key == "alphafold_url": |
|
alphafold_url = value |
|
else: |
|
print(f"Argumen '{key}' tidak dikenali.") |
|
|
|
|
|
target_folder = os.path.join("result", "result_"+generate_filename_with_timestamp_and_random()) |
|
create_folder(target_folder) |
|
reva = ReVa(sequence, get_base_path(),os.path.join(target_folder, generate_filename_with_timestamp_and_random()), n_receptor, n_adjuvant, blast_activate, llm_url, alphafold_url) |
|
qreva = QReVa(sequence, get_base_path(),os.path.join(target_folder, generate_filename_with_timestamp_and_random("quantum")), n_receptor, n_adjuvant, blast_activate=blast_activate ,qibm_api="", backend_type="ibmq_qasm_simulator",llm_url=llm_url, alphafold_url=alphafold_url) |
|
res1, res2, res3 = reva.predict() |
|
qres1, qres2, qres3 = qreva.predict() |
|
|
|
if __name__ == "__main__": |
|
main() |