NiniCat commited on
Commit
e93c659
1 Parent(s): 666f677

Add application file

Browse files
Files changed (1) hide show
  1. app.py +207 -0
app.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tiger
3
+ import pandas as pd
4
+ import streamlit as st
5
+ from pathlib import Path
6
+
7
+ ENTRY_METHODS = dict(
8
+ manual='Manual entry of single transcript',
9
+ fasta="Fasta file upload (supports multiple transcripts if they have unique ID's)"
10
+ )
11
+
12
+
13
+ @st.cache_data
14
+ def convert_df(df):
15
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
16
+ return df.to_csv().encode('utf-8')
17
+
18
+
19
+ def mode_change_callback():
20
+ if st.session_state.mode in {tiger.RUN_MODES['all'], tiger.RUN_MODES['titration']}: # TODO: support titration
21
+ st.session_state.check_off_targets = False
22
+ st.session_state.disable_off_target_checkbox = True
23
+ else:
24
+ st.session_state.disable_off_target_checkbox = False
25
+
26
+
27
+ def progress_update(update_text, percent_complete):
28
+ with progress.container():
29
+ st.write(update_text)
30
+ st.progress(percent_complete / 100)
31
+
32
+
33
+ def initiate_run():
34
+
35
+ # initialize state variables
36
+ st.session_state.transcripts = None
37
+ st.session_state.input_error = None
38
+ st.session_state.on_target = None
39
+ st.session_state.titration = None
40
+ st.session_state.off_target = None
41
+
42
+ # initialize transcript DataFrame
43
+ transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL])
44
+
45
+ # manual entry
46
+ if st.session_state.entry_method == ENTRY_METHODS['manual']:
47
+ transcripts = pd.DataFrame({
48
+ tiger.ID_COL: ['ManualEntry'],
49
+ tiger.SEQ_COL: [st.session_state.manual_entry]
50
+ }).set_index(tiger.ID_COL)
51
+
52
+ # fasta file upload
53
+ elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
54
+ if st.session_state.fasta_entry is not None:
55
+ fasta_path = st.session_state.fasta_entry.name
56
+ with open(fasta_path, 'w') as f:
57
+ f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
58
+ transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
59
+ os.remove(fasta_path)
60
+
61
+ # convert to upper case as used by tokenizer
62
+ transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper().replace('U', 'T'))
63
+
64
+ # ensure all transcripts have unique identifiers
65
+ if transcripts.index.has_duplicates:
66
+ st.session_state.input_error = "Duplicate transcript ID's detected in fasta file"
67
+
68
+ # ensure all transcripts only contain nucleotides A, C, G, T, and wildcard N
69
+ elif not all(transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))):
70
+ st.session_state.input_error = 'Transcript(s) must only contain upper or lower case A, C, G, and Ts or Us'
71
+
72
+ # ensure all transcripts satisfy length requirements
73
+ elif any(transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)):
74
+ st.session_state.input_error = 'Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN)
75
+
76
+ # run model if we have any transcripts
77
+ elif len(transcripts) > 0:
78
+ st.session_state.transcripts = transcripts
79
+
80
+
81
+ if __name__ == '__main__':
82
+
83
+ # app initialization
84
+ if 'mode' not in st.session_state:
85
+ st.session_state.mode = tiger.RUN_MODES['all']
86
+ st.session_state.disable_off_target_checkbox = True
87
+ if 'entry_method' not in st.session_state:
88
+ st.session_state.entry_method = ENTRY_METHODS['manual']
89
+ if 'transcripts' not in st.session_state:
90
+ st.session_state.transcripts = None
91
+ if 'input_error' not in st.session_state:
92
+ st.session_state.input_error = None
93
+ if 'on_target' not in st.session_state:
94
+ st.session_state.on_target = None
95
+ if 'titration' not in st.session_state:
96
+ st.session_state.titration = None
97
+ if 'off_target' not in st.session_state:
98
+ st.session_state.off_target = None
99
+
100
+ # title and documentation
101
+ st.markdown(Path('tiger.md').read_text(), unsafe_allow_html=True)
102
+ st.divider()
103
+
104
+ # mode selection
105
+ col1, col2 = st.columns([0.65, 0.35])
106
+ with col1:
107
+ st.radio(
108
+ label='What do you want to predict?',
109
+ options=tuple(tiger.RUN_MODES.values()),
110
+ key='mode',
111
+ on_change=mode_change_callback,
112
+ disabled=st.session_state.transcripts is not None,
113
+ )
114
+ with col2:
115
+ st.checkbox(
116
+ label='Find off-target effects (slow)',
117
+ key='check_off_targets',
118
+ disabled=st.session_state.disable_off_target_checkbox or st.session_state.transcripts is not None
119
+ )
120
+
121
+ # transcript entry
122
+ st.selectbox(
123
+ label='How would you like to provide transcript(s) of interest?',
124
+ options=ENTRY_METHODS.values(),
125
+ key='entry_method',
126
+ disabled=st.session_state.transcripts is not None
127
+ )
128
+ if st.session_state.entry_method == ENTRY_METHODS['manual']:
129
+ st.text_input(
130
+ label='Enter a target transcript:',
131
+ key='manual_entry',
132
+ placeholder='Upper or lower case',
133
+ disabled=st.session_state.transcripts is not None
134
+ )
135
+ elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
136
+ st.file_uploader(
137
+ label='Upload a fasta file:',
138
+ key='fasta_entry',
139
+ disabled=st.session_state.transcripts is not None
140
+ )
141
+
142
+ # let's go!
143
+ st.button(label='Get predictions!', on_click=initiate_run, disabled=st.session_state.transcripts is not None)
144
+ progress = st.empty()
145
+
146
+ # input error
147
+ error = st.empty()
148
+ if st.session_state.input_error is not None:
149
+ error.error(st.session_state.input_error, icon="🚨")
150
+ else:
151
+ error.empty()
152
+
153
+ # on-target results
154
+ on_target_results = st.empty()
155
+ if st.session_state.on_target is not None:
156
+ with on_target_results.container():
157
+ st.write('On-target predictions:', st.session_state.on_target)
158
+ st.download_button(
159
+ label='Download on-target predictions',
160
+ data=convert_df(st.session_state.on_target),
161
+ file_name='on_target.csv',
162
+ mime='text/csv'
163
+ )
164
+ else:
165
+ on_target_results.empty()
166
+
167
+ # titration results
168
+ titration_results = st.empty()
169
+ if st.session_state.titration is not None:
170
+ with titration_results.container():
171
+ st.write('Titration predictions:', st.session_state.titration)
172
+ st.download_button(
173
+ label='Download titration predictions',
174
+ data=convert_df(st.session_state.titration),
175
+ file_name='titration.csv',
176
+ mime='text/csv'
177
+ )
178
+ else:
179
+ titration_results.empty()
180
+
181
+ # off-target results
182
+ off_target_results = st.empty()
183
+ if st.session_state.off_target is not None:
184
+ with off_target_results.container():
185
+ if len(st.session_state.off_target) > 0:
186
+ st.write('Off-target predictions:', st.session_state.off_target)
187
+ st.download_button(
188
+ label='Download off-target predictions',
189
+ data=convert_df(st.session_state.off_target),
190
+ file_name='off_target.csv',
191
+ mime='text/csv'
192
+ )
193
+ else:
194
+ st.write('We did not find any off-target effects!')
195
+ else:
196
+ off_target_results.empty()
197
+
198
+ # keep trying to run model until we clear inputs (streamlit UI changes can induce race-condition reruns)
199
+ if st.session_state.transcripts is not None:
200
+ st.session_state.on_target, st.session_state.titration, st.session_state.off_target = tiger.tiger_exhibit(
201
+ transcripts=st.session_state.transcripts,
202
+ mode={v: k for k, v in tiger.RUN_MODES.items()}[st.session_state.mode],
203
+ check_off_targets=st.session_state.check_off_targets,
204
+ status_update_fn=progress_update
205
+ )
206
+ st.session_state.transcripts = None
207
+ st.experimental_rerun()