| from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer |
| from transformers import AutoModelForMaskedLM |
| import numpy as np |
| from scoring.functions.binding import BindingAffinity |
| from scoring.functions.permeability import Permeability |
| from scoring.functions.solubility import Solubility |
| from scoring.functions.hemolysis import Hemolysis |
| from scoring.functions.nonfouling import Nonfouling |
|
|
| base_path = '/path/to/your/home' |
|
|
| class ScoringFunctions: |
| def __init__(self, score_func_names=None, prot_seqs=None, device=None): |
| """ |
| Class for generating score vectors given generated sequence |
| |
| Args: |
| score_func_names: list of scoring function names to be evaluated |
| score_weights: weights to scale scores (default: 1) |
| target_protein: sequence of target protein binder |
| """ |
| emb_model = AutoModelForMaskedLM.from_pretrained('aaronfeller/PeptideCLM-23M-all').roformer.to(device).eval() |
| tokenizer = SMILES_SPE_Tokenizer(f'{base_path}/TR2-D2/tr2d2-pep/tokenizer/new_vocab.txt', |
| f'{base_path}/TR2-D2/tr2d2-pep/tokenizer/new_splits.txt') |
| prot_seqs = prot_seqs if prot_seqs is not None else [] |
| |
| if score_func_names is None: |
| |
| self.score_func_names = [] |
| else: |
| self.score_func_names = score_func_names |
| |
| |
| |
| |
| self.target_protein = prot_seqs |
| print(len(prot_seqs)) |
| |
| if ('binding_affinity1' in score_func_names) and (len(prot_seqs) == 1): |
| binding_affinity1 = BindingAffinity(prot_seqs[0], tokenizer=tokenizer, base_path=base_path, device=device) |
| binding_affinity2 = None |
| elif ('binding_affinity1' in score_func_names) and ('binding_affinity2' in score_func_names) and (len(prot_seqs) == 2): |
| binding_affinity1 = BindingAffinity(prot_seqs[0], tokenizer=tokenizer, base_path=base_path, device=device) |
| binding_affinity2 = BindingAffinity(prot_seqs[1], tokenizer=tokenizer, base_path=base_path, device=device) |
| else: |
| print("here") |
| binding_affinity1 = None |
| binding_affinity2 = None |
|
|
| permeability = Permeability(tokenizer=tokenizer, base_path=base_path, device=device, emb_model=emb_model) |
| sol = Solubility(tokenizer=tokenizer, base_path=base_path, device=device, emb_model=emb_model) |
| nonfouling = Nonfouling(tokenizer=tokenizer, base_path=base_path, device=device, emb_model=emb_model) |
| hemo = Hemolysis(tokenizer=tokenizer, base_path=base_path, device=device, emb_model=emb_model) |
|
|
| self.all_funcs = {'binding_affinity1': binding_affinity1, |
| 'binding_affinity2': binding_affinity2, |
| 'permeability': permeability, |
| 'nonfouling': nonfouling, |
| 'solubility': sol, |
| 'hemolysis': hemo |
| } |
| |
| def forward(self, input_seqs): |
| scores = [] |
| |
| for i, score_func in enumerate(self.score_func_names): |
| score = self.all_funcs[score_func](input_seqs = input_seqs) |
| |
| scores.append(score) |
| |
| |
| scores = np.float32(scores).T |
| |
| return scores |
| |
| def __call__(self, input_seqs: list): |
| return self.forward(input_seqs) |