Spaces:
Build error
Build error
PascalNotin
commited on
Commit
•
b07be69
1
Parent(s):
5d3f7a9
Made visual enhancements
Browse files
app.py
CHANGED
@@ -44,7 +44,7 @@ def create_scoring_matrix_visual(scores,sequence,AA_vocab=AA_vocab,mutation_rang
|
|
44 |
scores_dict = {}
|
45 |
valid_mutant_set=set(scores.mutant)
|
46 |
if mutation_range_start is None: mutation_range_start=1
|
47 |
-
if mutation_range_end is None:
|
48 |
for target_AA in list(AA_vocab):
|
49 |
for position in range(mutation_range_start,mutation_range_end+1):
|
50 |
mutant = sequence[position-1]+str(position)+target_AA
|
@@ -109,16 +109,17 @@ def score_and_create_matrix_all_singles(sequence,mutation_range_start=None,mutat
|
|
109 |
|
110 |
title = "Interactive in silico directed evolution with Tranception"
|
111 |
description = "Perform in silico directed evolution with Tranception to iteratively improve the fitness of a starting protein sequence one mutation at a time. At each step, the Tranception model computes the log likelihood ratios of all possible single amino acid substitution Vs the starting sequence, and outputs a fitness heatmap and recommandations to guide the selection of the mutation to apply. Note: The current version does not currently leverage homologs retrieval at inference time to boost fitness prediction performance."
|
112 |
-
article = "<p style='text-align:
|
|
|
|
|
113 |
examples=[
|
114 |
-
['
|
115 |
-
['
|
116 |
-
['
|
117 |
-
['P53_HUMAN: MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPRVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD']
|
118 |
]
|
119 |
|
120 |
model_size_selection = gr.Radio(label="Tranception model size (larger models are more accurate but are slower at inference)", choices=["Small","Medium","Large"], value="Small")
|
121 |
-
protein_sequence_input = gr.Textbox(lines=1, label="Input protein sequence (
|
122 |
mutation_range_start = gr.Number(label="Start of mutation range (min value = 1)",value=1,precision=0)
|
123 |
mutation_range_end = gr.Number(label="End of mutation range (leave empty for full lenth)",value=10,precision=0)
|
124 |
scoring_mirror = gr.Checkbox(label="Score protein from both directions (leads to more robust fitness predictions, but doubles inference time)")
|
@@ -130,10 +131,10 @@ output_recommendations = gr.Textbox(label="Mutation recommendations")
|
|
130 |
gr.Interface(
|
131 |
fn=score_and_create_matrix_all_singles,
|
132 |
inputs=[protein_sequence_input,mutation_range_start,mutation_range_end,model_size_selection,scoring_mirror],
|
133 |
-
outputs=[
|
134 |
title=title,
|
135 |
description=description,
|
136 |
article=article,
|
137 |
-
|
138 |
allow_flagging="never"
|
139 |
).launch(debug=True)
|
|
|
44 |
scores_dict = {}
|
45 |
valid_mutant_set=set(scores.mutant)
|
46 |
if mutation_range_start is None: mutation_range_start=1
|
47 |
+
if mutation_range_end is None: mutation_range_end=len(sequence)
|
48 |
for target_AA in list(AA_vocab):
|
49 |
for position in range(mutation_range_start,mutation_range_end+1):
|
50 |
mutant = sequence[position-1]+str(position)+target_AA
|
|
|
109 |
|
110 |
title = "Interactive in silico directed evolution with Tranception"
|
111 |
description = "Perform in silico directed evolution with Tranception to iteratively improve the fitness of a starting protein sequence one mutation at a time. At each step, the Tranception model computes the log likelihood ratios of all possible single amino acid substitution Vs the starting sequence, and outputs a fitness heatmap and recommandations to guide the selection of the mutation to apply. Note: The current version does not currently leverage homologs retrieval at inference time to boost fitness prediction performance."
|
112 |
+
article = "<p style='text-align: left'>**Tranception: Protein Fitness Prediction with Autoregressive Transformers and Inference-time Retrieval**</p>"
|
113 |
+
article += "<br><p style='text-align: left'> Pascal Notin, Mafalda Dias, Jonathan Frazer, Javier Marchena-Hurtado, Aidan N. Gomez, Debora S. Marks<sup>*</sup>, Yarin Gal<sup>*</sup>"
|
114 |
+
article += "<br><p style='text-align: left'> <a href='https://proceedings.mlr.press/v162/notin22a.html' target='_blank'>Paper</a> *** <a href='https://github.com/OATML-Markslab/Tranception' target='_blank'>Code</a> </p>
|
115 |
examples=[
|
116 |
+
['ADRB2_HUMAN --> MGQPGNGSAFLLAPNGSHAPDHDVTQERDEVWVVGMGIVMSLIVLAIVFGNVLVITAIAKFERLQTVTNYFITSLACADLVMGLAVVPFGAAHILMKMWTFGNFWCEFWTSIDVLCVTASIETLCVIAVDRYFAITSPFKYQSLLTKNKARVIILMVWIVSGLTSFLPIQMHWYRATHQEAINCYANETCCDFFTNQAYAIASSIVSFYVPLVIMVFVYSRVFQEAKRQLQKIDKSEGRFHVQNLSQVEQDGRTGHGLRRSSKFCLKEHKALKTLGIIMGTFTLCWLPFFIVNIVHVIQDNLIRKEVYILLNWIGYVNSGFNPLIYCRSPDFRIAFQELLCLRRSSLKAYGNGYSSNGNTGEQSGYHVEQEKENKLLCEDLPGTEDFVGHQGTVPSDNIDSQGRNCSTNDSLL', 1, 10, "Small", True],
|
117 |
+
['IF1_ECOLI --> MAKEDNIEMQGTVLETLPNTMFRVELENGHVVTAHISGKMRKNYIRILTGDKVTVELTPYDLSKGRIVFRSR', 1, None, "Medium", False],
|
118 |
+
['P53_HUMAN --> MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPRVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD', 5, 10, "Large", False]
|
|
|
119 |
]
|
120 |
|
121 |
model_size_selection = gr.Radio(label="Tranception model size (larger models are more accurate but are slower at inference)", choices=["Small","Medium","Large"], value="Small")
|
122 |
+
protein_sequence_input = gr.Textbox(lines=1, label="Input protein sequence (default = RL40A_YEAST)",value="MQIFVKTLTGKTITLEVESSDTIDNVKSKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGGIIEPSLKALASKYNCDKSVCRKCYARLPPRATNCRKRKCGHTNQLRPKKKLK")
|
123 |
mutation_range_start = gr.Number(label="Start of mutation range (min value = 1)",value=1,precision=0)
|
124 |
mutation_range_end = gr.Number(label="End of mutation range (leave empty for full lenth)",value=10,precision=0)
|
125 |
scoring_mirror = gr.Checkbox(label="Score protein from both directions (leads to more robust fitness predictions, but doubles inference time)")
|
|
|
131 |
gr.Interface(
|
132 |
fn=score_and_create_matrix_all_singles,
|
133 |
inputs=[protein_sequence_input,mutation_range_start,mutation_range_end,model_size_selection,scoring_mirror],
|
134 |
+
outputs=[output_plot,output_recommendations],
|
135 |
title=title,
|
136 |
description=description,
|
137 |
article=article,
|
138 |
+
examples=examples,
|
139 |
allow_flagging="never"
|
140 |
).launch(debug=True)
|