syedislamuddin commited on
Commit
4a556a5
·
1 Parent(s): a136848

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +883 -0
app.py ADDED
@@ -0,0 +1,883 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #from turtle import shape
2
+ import streamlit as st
3
+ #from st_keyup import st_keyup
4
+ import pandas as pd
5
+ import numpy as np
6
+ from st_aggrid import AgGrid, GridOptionsBuilder,GridUpdateMode,DataReturnMode
7
+
8
+ import os
9
+
10
+ st.set_page_config(layout="wide")
11
+ st.markdown(
12
+ """
13
+ <style>
14
+ .streamlit-expanderHeader {
15
+ font-size: x-large;
16
+ }
17
+ </style>
18
+ """,
19
+ unsafe_allow_html=True,
20
+ )
21
+ caution = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please note that Only one Guide (from pair) is found. Please see guides not found section for other guide</p>'
22
+ caution1 = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please note that Each mutated guide is reported as a sepearte line. sgID_1/2, sgRNA_1/2, chr_sgRNA_1/2 and position_sgRNA_1/2 represent values for reference/mutated guide</p>'
23
+ caution2 = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please Select a single/multiple guides and then select Check Box A, B or C Otherwise code will through error</p>'
24
+ table_edit = '<p style="font-family:sans-serif; color:Green; font-size: 16px;">About Table: Please note that table can be <b>sorted by clicking on any column</b> and <b>Multiple rows can be selected</b> (by clicking check box in first column) to save only those rows.</p>'
25
+
26
+ def transform(df,str):
27
+ # Select columns
28
+ #cols = st.multiselect('Please select columns to save current Table as csv file',
29
+ cols = st.multiselect(str,
30
+ df.columns.tolist(),
31
+ df.columns.tolist()
32
+ )
33
+ df = df[cols]
34
+ return df
35
+
36
+ def convert_df(df):
37
+ return df.to_csv().encode('utf-8')
38
+ def convert_df1(df):
39
+ return df.to_csv(index=False).encode('utf-8')
40
+
41
+
42
+ # CSS to inject contained in a string
43
+ hide_table_row_index = """
44
+ <style>
45
+ thead tr th:first-child {display:none}
46
+ tbody th {display:none}
47
+ </style>
48
+ """
49
+
50
+ # Inject CSS with Markdown
51
+ st.markdown(hide_table_row_index, unsafe_allow_html=True)
52
+
53
+
54
+ #########TABLE DISPLAY
55
+ def tbl_disp(dat,var,ref,flg=1):
56
+ dat.reset_index(drop=True, inplace=True)
57
+ #df = transform(dft,'Please Select columns to save whole table')
58
+ #fname = st.text_input('Please input file name to save Table', 'temp')
59
+ #fname = st_keyup("Please input file name to save Table", value='temp')
60
+ csv = convert_df(dat)
61
+ if flg==1:
62
+ st.download_button(
63
+ label="Download Full Table as CSV file",
64
+ data=csv,
65
+ file_name=var+'_'+ref+'.csv',#fname+'.csv',
66
+ mime='text/csv',
67
+ )
68
+ #st.table(dft)
69
+ #st.markdown(table_edit,unsafe_allow_html=True)
70
+ gb = GridOptionsBuilder.from_dataframe(dat)
71
+ gb.configure_pagination(enabled=False)#,paginationAutoPageSize=False)#True) #Add pagination
72
+ gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
73
+ gb.configure_selection(selection_mode="multiple", use_checkbox=True)
74
+
75
+ gb.configure_side_bar()
76
+ gridOptions = gb.build()
77
+
78
+ grid_response = AgGrid(
79
+ dat,
80
+ height=200,
81
+ gridOptions=gridOptions,
82
+ enable_enterprise_modules=True,
83
+ update_mode=GridUpdateMode.MODEL_CHANGED,
84
+ data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
85
+ fit_columns_on_grid_load=False,
86
+ header_checkbox_selection_filtered_only=True,
87
+ use_checkbox=True,
88
+ width='100%'
89
+ )
90
+
91
+ selected = grid_response['selected_rows']
92
+ if selected:
93
+ st.write('Selected rows')
94
+
95
+ dfs = pd.DataFrame(selected)
96
+ st.dataframe(dfs[dfs.columns[1:dfs.shape[1]]])
97
+
98
+ #dfs1 = transform(dfs[dfs.columns[1:dfs.shape[1]]],'Please select columns to save selected Table')
99
+ csv = convert_df1(dfs[dfs.columns[1:dfs.shape[1]]])
100
+ #csv = convert_df1(dfs1)
101
+
102
+
103
+ st.download_button(
104
+ label="Download data as CSV",
105
+ data=csv,
106
+ file_name=var+'_'+ref+'.csv',
107
+ mime='text/csv',
108
+ )
109
+ return dfs
110
+
111
+
112
+
113
+ def assemble_tbl(t):
114
+ dft = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
115
+ for i in range(0,t.shape[0],2):
116
+ l1=t.iloc[[i]]
117
+ l1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','mutated_guide', 'strand', 'num_mismatch']
118
+
119
+ l2=t.iloc[[i+1]]
120
+ l2.columns=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2','mutated_guide2', 'strand2', 'num_mismatch2']
121
+ listA_concatenated_match_LR1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
122
+ listA_concatenated_match_LR1=listA_concatenated_match_LR1[['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']]
123
+ listA_concatenated_match_LR1['sgRNA_1']=listA_concatenated_match_LR1['sgRNA_1'].str.slice(0, 20)
124
+ listA_concatenated_match_LR1['sgRNA_2']=listA_concatenated_match_LR1['sgRNA_2'].str.slice(0, 20)
125
+ listA_concatenated_match_LR1['sgID_1_2']=listA_concatenated_match_LR1['sgID_1']+"|"+listA_concatenated_match_LR1['sgID_1']
126
+ dft=dft.append(listA_concatenated_match_LR1)
127
+
128
+ return dft
129
+
130
+ def get_lists(ref_list,list_found_ref,list_notfound_ref):
131
+ a_ref=[]
132
+ for i in range(len(ref_list)):
133
+ a_ref.append(ref_list.gene.values[i].split('|')[0])
134
+ a_ref.append(ref_list.gene.values[i].split('|')[1])
135
+ #check GRCh38
136
+ #st.table(a_ref)
137
+ set_found0_ref=[]
138
+ for i in range(len(a_ref)):
139
+ set_found0_ref.append(list_found_ref[list_found_ref['gene']==a_ref[i]])
140
+ list_concatenated_found_ref = pd.concat(set_found0_ref)
141
+
142
+
143
+
144
+ #split in found and not found
145
+
146
+ list_concatenated_match_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch == 0]
147
+ #list_concatenated_match_ref=list_concatenated_match_ref.sort_values('position')
148
+
149
+ #Also remove Alternate loci's data
150
+ list_concatenated_match_ref = list_concatenated_match_ref[list_concatenated_match_ref['chr'].str.contains('chr')]
151
+
152
+ #also create new list with both sgRNAs in one row
153
+ dft=pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
154
+ if list_concatenated_match_ref.shape[0]>0:
155
+ t=list_concatenated_match_ref.reset_index(drop=True)
156
+ #st.table(t)
157
+
158
+ ##########
159
+ #check even/odd entries
160
+ if t.shape[0]==1:
161
+ t1=t.loc[t.index.repeat(2)].reset_index(drop=True)
162
+ #st.write(t1)
163
+ dft=assemble_tbl(t1)
164
+
165
+ elif t.shape[0]%2==0: #even
166
+ dft=assemble_tbl(t)
167
+
168
+ else: #odd
169
+ t1 = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
170
+ i=0
171
+ while i <t.shape[0]:
172
+ #for i in range(t.shape[0]):
173
+ #if t.iloc[i,['gene']] == t.iloc[i+1,['gene']]:
174
+ #st.table(t)
175
+ #st.write(i)
176
+ if i<t.shape[0]-1:
177
+ if t.iloc[i]['gene'] == t.iloc[i+1]['gene'] and t.iloc[i]['chr'] == t.iloc[i+1]['chr'] and t.iloc[i]['position'] == t.iloc[i+1]['position']:
178
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
179
+ t1=t1.append(t.iloc[[i+1]], ignore_index = True)
180
+ i=i+2
181
+ else: #repeat entries
182
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
183
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
184
+ #st.table(t1)
185
+ i=i+1
186
+ else:
187
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
188
+ t1=t1.append(t.iloc[[i]], ignore_index = True)
189
+ i=i+1
190
+ #st.table(t1)
191
+
192
+
193
+ dft=assemble_tbl(t1)
194
+ list_concatenated_mutated_ref = list_concatenated_found_ref[list_concatenated_found_ref.num_mismatch > 0]
195
+ list_concatenated_mutated_ref=list_concatenated_mutated_ref.sort_values('position')
196
+
197
+ #Also remove Alternate loci's data
198
+
199
+ list_concatenated_mutated_ref = list_concatenated_mutated_ref[list_concatenated_mutated_ref['chr'].str.contains('chr')]
200
+ dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
201
+ if list_concatenated_mutated_ref.shape[0]>0:
202
+ dft_mut = get_mutated_res(list_concatenated_mutated_ref)
203
+ #check not found
204
+ seta_notfound0_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[0]]
205
+ seta_notfound1_ref=list_notfound_ref[list_notfound_ref['gene']==a_ref[1]]
206
+ list_concatenated_notfound_ref = pd.concat([seta_notfound0_ref,seta_notfound1_ref])
207
+ return dft, dft_mut,list_concatenated_notfound_ref,list_concatenated_match_ref,list_concatenated_mutated_ref
208
+ ###########
209
+
210
+ def get_mutated_res(list_concatenated_mutated_ref):
211
+ #########
212
+ #if list_concatenated_mutated_ref.shape[0]>0:
213
+ t=list_concatenated_mutated_ref.reset_index(drop=True)
214
+ #st.table(t)
215
+ dft_mut = pd.DataFrame(columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2', 'sgID_1_2'])
216
+ c1=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1']
217
+ c2=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']#, 'sgID_1_2']
218
+ #st.table(listA_concatenated_match_ref)
219
+ #st.write(t.shape[0])
220
+ tf=0
221
+ #for i in range(0,t.shape[0],2):
222
+ for i in range(t.shape[0]):
223
+ l1=t.iloc[[i]]
224
+ l1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','mutated_guide', 'strand', 'num_mismatch']
225
+ l2=l1.copy()
226
+ l2.columns=['sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2','mutated_guide2', 'strand2', 'num_mismatch2']
227
+ list_concatenated_mutated_ref1=[]
228
+ #listA_concatenated_mutated_ref1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
229
+ list_concatenated_mutated_ref1=pd.concat([l1.reset_index(drop=True),l2.reset_index(drop=True)],axis=1)
230
+ #st.table(listA_concatenated_mutated_ref1)
231
+ list_concatenated_mutated_ref1=list_concatenated_mutated_ref1[['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','mutated_guide2','chr_sgRNA_2','position_sgRNA_2']]
232
+ #also change if not leading G
233
+ list_concatenated_mutated_ref1['sgRNA_1']='G'+list_concatenated_mutated_ref1['sgRNA_1'].str.slice(1, 20)
234
+ #also change name of mutated_guide2 column
235
+ list_concatenated_mutated_ref1.columns=['sgID_1','sgRNA_1','chr_sgRNA_1','position_sgRNA_1','sgID_2','sgRNA_2','chr_sgRNA_2','position_sgRNA_2']
236
+
237
+ list_concatenated_mutated_ref1['sgRNA_2']='G'+list_concatenated_mutated_ref1['sgRNA_2'].str.slice(1, 20)
238
+ list_concatenated_mutated_ref1['sgID_1_2']=list_concatenated_mutated_ref1['sgID_1']+"|"+list_concatenated_mutated_ref1['sgID_1']
239
+ dft_mut=dft_mut.append(list_concatenated_mutated_ref1)
240
+ return dft_mut
241
+
242
+ #########
243
+
244
+ #def get_notfound():
245
+
246
+
247
+ cwd=os.getcwd()+'/'+'data/'
248
+
249
+ #get genes list
250
+ #listA = pd.read_csv(cwd+"20200513_library_1_2_unbalanced_dJR051.csv",index_col=False)
251
+ #listA = pd.read_csv(cwd+"newa1.csv",index_col=False)
252
+ #listB = pd.read_csv(cwd+"newb1.csv",index_col=False)
253
+ #listC = pd.read_csv(cwd+"newc1.csv",index_col=False)
254
+
255
+ listA = pd.read_csv(cwd+"guides_a_new.csv",index_col=False)
256
+
257
+ listB = pd.read_csv(cwd+"guides_b_new.csv",index_col=False)
258
+ listC = pd.read_csv(cwd+"guides_c_new.csv",index_col=False)
259
+ variantsa1=listA['gene'].unique()
260
+ variantsb1=listB['gene'].unique()
261
+ variantsc1=listC['gene'].unique()
262
+
263
+ con = np.concatenate((variantsa1, variantsb1,variantsc1))
264
+
265
+
266
+ #st.write(type(variantsc1))
267
+ variants_s=sorted(np.unique(con))
268
+ #st.write(len(variants_s))
269
+ #also get names for non-targetting guides
270
+
271
+
272
+ #Also read GRCh38 and LR guides for stea
273
+ listA_found_ref = pd.read_csv(cwd+"seta_found_ref1.csv",index_col=False)
274
+ #remove # from chr# #
275
+ listA_found_ref['chr'] = [x.split(' ')[-0] for x in listA_found_ref['chr']]
276
+ listA_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
277
+ listA_notfound_ref = pd.read_csv(cwd+"seta_notfound_ref1.csv",index_col=False)
278
+
279
+ listA_found_lr = pd.read_csv(cwd+"seta_found_LR1.csv",index_col=False)
280
+ listA_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
281
+ listA_notfound_lr = pd.read_csv(cwd+"seta_notfound_LR1.csv",index_col=False)
282
+
283
+ #Also read GRCh38 and LR guides for set b
284
+ listB_found_ref = pd.read_csv(cwd+"setb_found_ref1.csv",index_col=False)
285
+ #remove # from chr# #
286
+ listB_found_ref['chr'] = [x.split(' ')[-0] for x in listB_found_ref['chr']]
287
+ listB_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
288
+ listB_notfound_ref = pd.read_csv(cwd+"setb_notfound_ref1.csv",index_col=False)
289
+
290
+ listB_found_lr = pd.read_csv(cwd+"setb_found_LR1.csv",index_col=False)
291
+ listB_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
292
+ listB_notfound_lr = pd.read_csv(cwd+"setb_notfound_LR1.csv",index_col=False)
293
+
294
+ #Also read GRCh38 and LR guides for set c
295
+ listC_found_ref = pd.read_csv(cwd+"setc_found_ref1.csv",index_col=False)
296
+ #remove # from chr# #
297
+ listC_found_ref['chr'] = [x.split(' ')[-0] for x in listC_found_ref['chr']]
298
+ listC_found_ref.rename(columns = {'strnad':'strand'}, inplace = True)
299
+ listC_notfound_ref = pd.read_csv(cwd+"setc_notfound_ref1.csv",index_col=False)
300
+
301
+ listC_found_lr = pd.read_csv(cwd+"setc_found_LR1.csv",index_col=False)
302
+ listC_found_lr.rename(columns = {'strnad':'strand'}, inplace = True)
303
+ listC_notfound_lr = pd.read_csv(cwd+"setc_notfound_LR1.csv",index_col=False)
304
+
305
+
306
+
307
+ st.title('Long Read Guides Search')
308
+ #st.markdown('**Please select an option from the sidebar**')
309
+
310
+ #st.write(variants)
311
+
312
+
313
+ Calc = st.sidebar.radio(
314
+ "",
315
+ ('ReadME', 'Single Gene','Multiple Genes'))
316
+
317
+
318
+ if Calc == 'ReadME':
319
+ expander = st.expander("How to use this app")
320
+ #st.header('How to use this app')
321
+ expander.markdown('Please select **Single Gene** OR **Multiple Genes** Menue checkbox from the sidebar')
322
+ expander.markdown('Select a Gene (from genes dropdown list) OR Multiple genes (from table)')
323
+ expander.markdown('A table showing all reference gudies from three LISTS will appear in the main panel. **Please not some of the genes (for example A1BG and GJB7) have multiple guide pairs and all of these are selected.**')
324
+ expander.markdown('To see results for each of the selected reference guide from ListA, ListB and ListC, Please select respective checkbox')
325
+ expander.markdown('Results are shown as two tables, **Matched** and **Mutated** guides tables and **NOT FOUND** table if guides are not found in GRCh38 and LR reference fasta files')
326
+ expander.markdown('**Mutated** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**')
327
+ expander.markdown('**Mutated** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**')
328
+
329
+ expander1 = st.expander('Introduction')
330
+
331
+ expander1.markdown(
332
+ """ This app helps navigate all probable genomic **miss-matched/Mutations (upto 2 bp)** for a given sgRNA (from 3 lists of CRISPRi dual sgRNA libraries) in GRCh38 reference fasta and a Reference fasta generated from BAM generated against KOLF2.1J longread data.
333
+ """
334
+ )
335
+ expander1.markdown('Merged bam file was converted to fasta file using following steps:')
336
+ expander1.markdown('- samtools mpileup to generate bcf file')
337
+ expander1.markdown('- bcftools to generate vcf file')
338
+ expander1.markdown('- bcftools consensus to generate fasta file')
339
+ expander1.markdown('A GPU based [Cas-OFFinder](http://www.rgenome.net/cas-offinder/) tool was used to find off-target sequences (upto 2 miss-matched) for each geiven reference guide against GRCh38 and LR fasta references.')
340
+
341
+ elif Calc=='Single Gene':
342
+ #if Calc == 'Selection Menu':
343
+ #ReadMe = st.sidebar.checkbox('ReadME',value=False)
344
+ select_variant = st.sidebar.selectbox(
345
+ "Please select Gene",
346
+ variants_s
347
+ )
348
+ #ref_sgrna=listA[listA['sgID_A']==select_variant][['protospacer_A','protospacer_B']]
349
+ #get all references
350
+
351
+ ref_listA=listA[listA['gene']==select_variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
352
+ ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
353
+ ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
354
+
355
+ ref_listB=listB[listB['gene']==select_variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
356
+ ref_listB = ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
357
+ ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
358
+
359
+ ref_listC=listC[listC['gene']==select_variant][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
360
+ ref_listC = ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
361
+ ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
362
+ listA_concatenated_orig = pd.concat([ref_listA,ref_listB,ref_listC])
363
+
364
+ st.write('**Input** Guides (all 6 from 3 sets)')
365
+ st.markdown(table_edit,unsafe_allow_html=True)
366
+ tbl_disp(listA_concatenated_orig,select_variant,'ref_guides',0)
367
+ #st.table(listA_concatenated_orig)
368
+
369
+ #now search from results for list a
370
+ #st.write(ref_listA)
371
+ ListARes = st.checkbox('Results For SetA',key=1)
372
+ if ListARes:
373
+ if len(ref_listA)>0:
374
+ #st.table(ref_listA)
375
+
376
+ ##########
377
+ res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listA,listA_found_ref,listA_notfound_ref)
378
+ st.write('Selected Reference Guides for **Set A**')
379
+ st.table(ref_listA)
380
+ #tbl_disp(ref_listA,select_variant,'ReferenceGuides',0)
381
+ if res.shape[0]>0:
382
+ st.write('Matched to **GRCh38** Reference Guides for **Set A**')
383
+ tbl_disp(res,select_variant,'SetA_GRCh38')
384
+ elif res_mut.shape[0]>0:
385
+ st.write('Mutated to **GRCh38** Reference Guides for **Set A**')
386
+ st.markdown(caution1,unsafe_allow_html=True)
387
+ tbl_disp(res_mut,select_variant,'SetA_Mutated_GRCh38')
388
+ if res_notfound.shape[0]>0:
389
+ st.write('**SetA Guides Not Found in GRCh38**')
390
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
391
+ st.table(res_notfound)
392
+ ##########
393
+
394
+
395
+ #For LR
396
+ ##########
397
+ res_lr,res_mut_lr,res_notfound_lr,list_match_lr,list_mutated_lr=get_lists(ref_listA,listA_found_lr,listA_notfound_lr)
398
+ #st.write('Selected Reference Guides for **Set A**')
399
+ #tbl_disp(ref_listA,select_variant,'ReferenceGuides',0)
400
+ if res_lr.shape[0]>0:
401
+ st.write('Matched to **CHM13** Reference Guides for **Set A**')
402
+ tbl_disp(res_lr,select_variant,'SetA_CHM13')
403
+ elif res_mut_lr.shape[0]>0:
404
+ st.write('Mutated to **CHM13** Reference Guides for **Set A**')
405
+ st.markdown(caution1,unsafe_allow_html=True)
406
+ tbl_disp(res_mut_lr,select_variant,'SetA_Mutated_CHM13')
407
+ if res_notfound_lr.shape[0]>0:
408
+ st.write('**SetA Guides Not Found in CHM13**')
409
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
410
+ st.table(res_notfound_lr)
411
+ ##########
412
+
413
+
414
+ #######
415
+ #NOW MERGE FROM GRCh38 and LR
416
+ merged_mutated_set=pd.merge(list_mutated,list_mutated_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
417
+ merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
418
+ merged_match_set=pd.merge(list_match,list_match_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
419
+ merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
420
+ if merged_match_set.shape[0]>0:
421
+ #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
422
+ st.write('**Matched** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
423
+ tbl_disp(merged_match_set,select_variant,'SetA_Matched_GRCh38_CHM13',0)
424
+
425
+ #st.table(merged_match_seta)
426
+ elif merged_mutated_set.shape[0]>0:
427
+ #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
428
+ st.write('**Mutated** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
429
+
430
+ tbl_disp(merged_mutated_set,select_variant,'SetA_Mutated_GRCh38_CHM13',0)
431
+
432
+ ########
433
+
434
+ else:
435
+ st.write('**Gene: **'+select_variant+' Not found in listA')
436
+
437
+
438
+ #list B
439
+ ListBRes = st.checkbox('Results For SetB',key=2)
440
+ if ListBRes:
441
+ if len(ref_listB)>0:
442
+ ##########
443
+ res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listB,listB_found_ref,listB_notfound_ref)
444
+ st.write('Selected Reference Guides for **Set B**')
445
+ st.table(ref_listB)
446
+ #tbl_disp(ref_listB,select_variant,'ReferenceGuides',0)
447
+ if res.shape[0]>0:
448
+ st.write('Matched to **GRCh38** Reference Guides for **Set B**')
449
+ tbl_disp(res,select_variant,'SetB_GRCh38')
450
+ elif res_mut.shape[0]>0:
451
+ st.write('Mutated to **GRCh38** Reference Guides for **Set B**')
452
+ st.markdown(caution1,unsafe_allow_html=True)
453
+ tbl_disp(res_mut,select_variant,'SetA_Mutated_GRCh38')
454
+ if res_notfound.shape[0]>0:
455
+ st.write('**SetB Guides Not Found in GRCh38**')
456
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
457
+ st.table(res_notfound)
458
+ ##########
459
+
460
+
461
+ #For LR
462
+ ##########
463
+ res_lr,res_mut_lr,res_notfound_lr,list_match_lr,list_mutated_lr=get_lists(ref_listB,listB_found_lr,listB_notfound_lr)
464
+ #st.write('Selected Reference Guides for **Set A**')
465
+ #tbl_disp(ref_listA,select_variant,'ReferenceGuides',0)
466
+ if res_lr.shape[0]>0:
467
+ st.write('Matched to **CHM13** Reference Guides for **Set B**')
468
+ tbl_disp(res_lr,select_variant,'SetB_CHM13')
469
+ elif res_mut_lr.shape[0]>0:
470
+ st.write('Mutated to **CHM13** Reference Guides for **Set B**')
471
+ st.markdown(caution1,unsafe_allow_html=True)
472
+ tbl_disp(res_mut_lr,select_variant,'SetB_Mutated_CHM13')
473
+ if res_notfound_lr.shape[0]>0:
474
+ st.write('**SetB Guides Not Found in CHM13**')
475
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
476
+ st.table(res_notfound_lr)
477
+ ##########
478
+
479
+
480
+ #######
481
+ #NOW MERGE FROM GRCh38 and LR
482
+ merged_mutated_set=pd.merge(list_mutated,list_mutated_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
483
+ merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
484
+ merged_match_set=pd.merge(list_match,list_match_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
485
+ merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
486
+ if merged_match_set.shape[0]>0:
487
+ #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
488
+ st.write('**Matched** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
489
+ tbl_disp(merged_match_set,select_variant,'SetB_Matched_GRCh38_CHM13',0)
490
+
491
+ #st.table(merged_match_seta)
492
+ elif merged_mutated_set.shape[0]>0:
493
+ #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
494
+ st.write('**Mutated** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
495
+
496
+ tbl_disp(merged_mutated_set,select_variant,'SetB_Mutated_GRCh38_CHM13',0)
497
+
498
+ ########
499
+
500
+ else:
501
+ st.write('**Gene: **'+select_variant+' Not found in listB')
502
+
503
+ ### list B
504
+
505
+ #list C
506
+ ListCRes = st.checkbox('Results For SetC',key=3)
507
+ if ListCRes:
508
+ if len(ref_listC)>0:
509
+ ##########
510
+ res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listC,listC_found_ref,listC_notfound_ref)
511
+ st.write('Selected Reference Guides for **Set C**')
512
+ st.table(ref_listC)
513
+ #tbl_disp(ref_listC,select_variant,'ReferenceGuides',0)
514
+ if res.shape[0]>0:
515
+ st.write('Matched to **GRCh38** Reference Guides for **Set C**')
516
+ tbl_disp(res,select_variant,'SetC_GRCh38')
517
+ elif res_mut.shape[0]>0:
518
+ st.write('Mutated to **GRCh38** Reference Guides for **Set C**')
519
+ st.markdown(caution1,unsafe_allow_html=True)
520
+ tbl_disp(res_mut,select_variant,'SetC_Mutated_GRCh38')
521
+ if res_notfound.shape[0]>0:
522
+ st.write('**SetC Guides Not Found in GRCh38**')
523
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
524
+ st.table(res_notfound)
525
+ ##########
526
+
527
+
528
+ #For LR
529
+ ##########
530
+ res_lr,res_mut_lr,res_notfound_lr,list_match_lr,list_mutated_lr=get_lists(ref_listC,listC_found_lr,listC_notfound_lr)
531
+ #st.write('Selected Reference Guides for **Set A**')
532
+ #tbl_disp(ref_listA,select_variant,'ReferenceGuides',0)
533
+ if res_lr.shape[0]>0:
534
+ st.write('Matched to **CHM13** Reference Guides for **Set C**')
535
+ tbl_disp(res_lr,select_variant,'SetC_CHM13')
536
+ elif res_mut_lr.shape[0]>0:
537
+ st.write('Mutated to **CHM13** Reference Guides for **Set C**')
538
+ st.markdown(caution1,unsafe_allow_html=True)
539
+ tbl_disp(res_mut_lr,select_variant,'SetC_Mutated_CHM13')
540
+ if res_notfound_lr.shape[0]>0:
541
+ st.write('**SetC Guides Not Found in CHM13**')
542
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
543
+ st.table(res_notfound_lr)
544
+ ##########
545
+
546
+
547
+ #######
548
+ #NOW MERGE FROM GRCh38 and LR
549
+ merged_mutated_set=pd.merge(list_mutated,list_mutated_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
550
+ merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
551
+ merged_match_set=pd.merge(list_match,list_match_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
552
+ merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
553
+ if merged_match_set.shape[0]>0:
554
+ #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
555
+ st.write('**Matched** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
556
+ tbl_disp(merged_match_set,select_variant,'SetC_Matched_GRCh38_CHM13',0)
557
+
558
+ #st.table(merged_match_seta)
559
+ elif merged_mutated_set.shape[0]>0:
560
+ #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
561
+ st.write('**Mutated** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
562
+
563
+ tbl_disp(merged_mutated_set,select_variant,'SetC_Mutated_GRCh38_CHM13',0)
564
+
565
+ ########
566
+
567
+ else:
568
+ st.write('**Gene: **'+select_variant+' Not found in listC')
569
+
570
+
571
+ ### list C
572
+ else:
573
+ select_mode = st.radio(
574
+ "Please select an option",
575
+ ('Select Single/Multiple Genes', 'Select All'))
576
+ #st.write('Please Select A **Single/Multiple/SelectAll** Reference Guides')
577
+ #get_table = pd.DataFrame(columns=['gene','sgID_A','protospacer_A','sgID_B','protospacer_B','sgID_AB'])
578
+ if select_mode=='Select Single/Multiple Genes':
579
+ st.markdown(table_edit,unsafe_allow_html=True)
580
+ get_table=tbl_disp(listA[['gene','sgID_A','protospacer_A','sgID_B','protospacer_B','sgID_AB']],'SetA','ReferenceGuides',0)
581
+
582
+ st.markdown(caution2,unsafe_allow_html=True)
583
+ else:
584
+ st.markdown(table_edit,unsafe_allow_html=True)
585
+ get_table=listA[['gene','sgID_A','protospacer_A','sgID_B','protospacer_B','sgID_AB']]
586
+
587
+ st.markdown(caution2,unsafe_allow_html=True)
588
+
589
+
590
+ #st.write(get_table)
591
+
592
+ ListARes = st.checkbox('Results For SetA',key=30)
593
+ if ListARes and not isinstance(get_table, type(None)):#get_table!=None:
594
+ #if ListARes and get_table.shape[0]>0:
595
+ variant_set=get_table[['gene']]
596
+ dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
597
+ dft_res=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
598
+ dft_res_mut=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
599
+ dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
600
+ df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
601
+ df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
602
+ #CHECK FOR GRCh38
603
+ for i in range(variant_set.shape[0]):
604
+ ref_listA=listA[listA['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
605
+ ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
606
+
607
+ ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
608
+ res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listA,listA_found_ref,listA_notfound_ref)
609
+ dft_a=dft_a.append(ref_listA)
610
+ if res.shape[0]>0:
611
+ dft_res=dft_res.append(res)
612
+ if res_mut.shape[0]>0:
613
+ dft_res_mut=dft_res_mut.append(res_mut)
614
+ if res_notfound.shape[0]>0:
615
+ dft_notfound= dft_notfound.append(res_notfound)
616
+ if list_match.shape[0]>0:
617
+ df_matched_guides_ref= df_matched_guides_ref.append(list_match)
618
+ if list_mutated.shape[0]>0:
619
+ df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
620
+
621
+ st.write('Selected Reference Guides for **Set A**')
622
+ tbl_disp(dft_a,'All','ReferenceGuides',0)
623
+ if dft_res.shape[0]>0:
624
+ st.write('Matched to **GRCh38** Reference Guides for **Set A**')
625
+ tbl_disp(dft_res,'select_genes','SetA_GRCh38')
626
+ elif dft_res_mut.shape[0]>0:
627
+ st.write('Mutated to **GRCh38** Reference Guides for **Set A**')
628
+ st.markdown(caution1,unsafe_allow_html=True)
629
+ tbl_disp(dft_res_mut,'select_genes','SetA_Mutated_GRCh38')
630
+ if dft_notfound.shape[0]>0:
631
+ st.write('**SetA Guides Not Found in GRCh38**')
632
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
633
+ st.table(dft_notfound)
634
+ #Now CHECK FOR CHM13
635
+ dft_a = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
636
+ dft_res=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
637
+ dft_res_mut=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
638
+ dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
639
+ df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
640
+ df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
641
+
642
+ for i in range(variant_set.shape[0]):
643
+ ref_listA=listA[listA['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
644
+ ref_listA = ref_listA[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
645
+
646
+ ref_listA.columns=['gene','guide_type','protospacer_A','protospacer_B']
647
+ res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listA,listA_found_lr,listA_notfound_lr)
648
+ dft_a=dft_a.append(ref_listA)
649
+ if res.shape[0]>0:
650
+ dft_res=dft_res.append(res)
651
+ if res_mut.shape[0]>0:
652
+ dft_res_mut=dft_res_mut.append(res_mut)
653
+ if res_notfound.shape[0]>0:
654
+ dft_notfound= dft_notfound.append(res_notfound)
655
+ if list_match.shape[0]>0:
656
+ df_matched_guides_lr= df_matched_guides_lr.append(list_match)
657
+ if list_mutated.shape[0]>0:
658
+ df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
659
+
660
+ if dft_res.shape[0]>0:
661
+ st.write('Matched to **CHM13** Reference Guides for **Set A**')
662
+ tbl_disp(dft_res,'select_genes','SetA_CHM13')
663
+ elif dft_res_mut.shape[0]>0:
664
+ st.write('Mutated to **CHM13** Reference Guides for **Set A**')
665
+ st.markdown(caution1,unsafe_allow_html=True)
666
+ tbl_disp(dft_res_mut,'select_genes','SetA_Mutated_CHM13')
667
+ if dft_notfound.shape[0]>0:
668
+ st.write('**SetA Guides Not Found in CHM13**')
669
+ st.table(dft_notfound)
670
+ #NOW MERGE FROM GRCh38 and LR
671
+ merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
672
+ merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
673
+ merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
674
+ merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
675
+ if merged_match_set.shape[0]>0:
676
+ #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
677
+ st.write('**Matched** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
678
+ tbl_disp(merged_match_set,'select_genes','SetA_Matched_GRCh38_CHM13',0)
679
+
680
+ #st.table(merged_match_seta)
681
+ elif merged_mutated_set.shape[0]>0:
682
+ #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
683
+ st.write('**Mutated** Guides for **Set A** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
684
+
685
+ tbl_disp(merged_mutated_set,'select_genes','SetA_Mutated_GRCh38_CHM13',0)
686
+ elif ListARes:
687
+ st.write("**Please select genes from the above table to begin**")
688
+
689
+ ListBRes = st.checkbox('Results For SetB',key=40)
690
+ if ListBRes and not isinstance(get_table, type(None)):#get_table!=None:
691
+ variant_set=get_table[['gene']]
692
+ dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
693
+ dft_res=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
694
+ dft_res_mut=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
695
+ dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
696
+ df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
697
+ df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
698
+ #CHECK FOR GRCh38
699
+ for i in range(variant_set.shape[0]):
700
+ ref_listB=listB[listB['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
701
+ ref_listB =ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
702
+
703
+ ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
704
+ res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listB,listB_found_ref,listB_notfound_ref)
705
+ dft_b=dft_b.append(ref_listB)
706
+ if res.shape[0]>0:
707
+ dft_res=dft_res.append(res)
708
+ if res_mut.shape[0]>0:
709
+ dft_res_mut=dft_res_mut.append(res_mut)
710
+ if res_notfound.shape[0]>0:
711
+ dft_notfound= dft_notfound.append(res_notfound)
712
+ if list_match.shape[0]>0:
713
+ df_matched_guides_ref= df_matched_guides_ref.append(list_match)
714
+ if list_mutated.shape[0]>0:
715
+ df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
716
+
717
+ st.write('Selected Reference Guides for **Set B**')
718
+ tbl_disp(dft_b,'All','ReferenceGuides',0)
719
+ if dft_res.shape[0]>0:
720
+ st.write('Matched to **GRCh38** Reference Guides for **Set B**')
721
+ tbl_disp(dft_res,'select_genes','SetB_GRCh38')
722
+ elif dft_res_mut.shape[0]>0:
723
+ st.write('Mutated to **GRCh38** Reference Guides for **Set B**')
724
+ st.markdown(caution1,unsafe_allow_html=True)
725
+ tbl_disp(dft_res_mut,'select_genes','SetB_Mutated_GRCh38')
726
+ if dft_notfound.shape[0]>0:
727
+ st.write('**SetB Guides Not Found in GRCh38**')
728
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
729
+ st.table(dft_notfound)
730
+
731
+ #Now CHECK FOR CHM13
732
+ dft_b = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
733
+ dft_res=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
734
+ dft_res_mut=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
735
+ dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
736
+ df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
737
+ df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
738
+
739
+ for i in range(variant_set.shape[0]):
740
+ ref_listB=listB[listB['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
741
+ ref_listB=ref_listB[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
742
+
743
+ ref_listB.columns=['gene','guide_type','protospacer_A','protospacer_B']
744
+ res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listB,listB_found_lr,listB_notfound_lr)
745
+ dft_b=dft_b.append(ref_listB)
746
+ if res.shape[0]>0:
747
+ dft_res=dft_res.append(res)
748
+ if res_mut.shape[0]>0:
749
+ dft_res_mut=dft_res_mut.append(res_mut)
750
+ if res_notfound.shape[0]>0:
751
+ dft_notfound= dft_notfound.append(res_notfound)
752
+ if list_match.shape[0]>0:
753
+ df_matched_guides_lr= df_matched_guides_lr.append(list_match)
754
+ if list_mutated.shape[0]>0:
755
+ df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
756
+
757
+ if dft_res.shape[0]>0:
758
+ st.write('Matched to **CHM13** Reference Guides for **Set B**')
759
+ tbl_disp(dft_res,'select_genes','SetB_CHM13')
760
+ elif dft_res_mut.shape[0]>0:
761
+ st.write('Mutated to **CHM13** Reference Guides for **Set B**')
762
+ st.markdown(caution1,unsafe_allow_html=True)
763
+ tbl_disp(dft_res_mut,'select_genes','SetB_Mutated_CHM13')
764
+ if dft_notfound.shape[0]>0:
765
+ st.write('**SetB Guides Not Found in CHM13**')
766
+ st.table(dft_notfound)
767
+ #NOW MERGE FROM GRCh38 and LR
768
+ merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
769
+ merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
770
+ merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
771
+ merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
772
+ if merged_match_set.shape[0]>0:
773
+ #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
774
+ st.write('**Matched** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
775
+ tbl_disp(merged_match_set,'select_genes','SetB_Matched_GRCh38_CHM13',0)
776
+
777
+ #st.table(merged_match_seta)
778
+ elif merged_mutated_set.shape[0]>0:
779
+ #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
780
+ st.write('**Mutated** Guides for **Set B** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
781
+ #st.markdown(caution1,unsafe_allow_html=True)
782
+ tbl_disp(merged_mutated_set,'select_genes','SetB_Mutated_GRCh38_CHM13',0)
783
+
784
+ elif ListBRes:
785
+ st.write("**Please select genes from the above table to begin**")
786
+
787
+ ListCRes = st.checkbox('Results For SetC',key=50)
788
+ if ListCRes and not isinstance(get_table, type(None)):#get_table!=None:
789
+ variant_set=get_table[['gene']]
790
+ dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
791
+ dft_res=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
792
+ dft_res_mut=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
793
+ dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
794
+ df_matched_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
795
+ df_mutated_guides_ref = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
796
+ #CHECK FOR GRCh38
797
+ for i in range(variant_set.shape[0]):
798
+ ref_listC=listC[listC['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
799
+ ref_listC =ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
800
+
801
+ ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
802
+ res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listC,listC_found_ref,listC_notfound_ref)
803
+ dft_c=dft_c.append(ref_listC)
804
+ if res.shape[0]>0:
805
+ dft_res=dft_res.append(res)
806
+ if res_mut.shape[0]>0:
807
+ dft_res_mut=dft_res_mut.append(res_mut)
808
+ if res_notfound.shape[0]>0:
809
+ dft_notfound= dft_notfound.append(res_notfound)
810
+ if list_match.shape[0]>0:
811
+ df_matched_guides_ref= df_matched_guides_ref.append(list_match)
812
+ if list_mutated.shape[0]>0:
813
+ df_mutated_guides_ref= df_mutated_guides_ref.append(list_mutated)
814
+
815
+ st.write('Selected Reference Guides for **Set B**')
816
+ tbl_disp(dft_c,'All','ReferenceGuides',0)
817
+ if dft_res.shape[0]>0:
818
+ st.write('Matched to **GRCh38** Reference Guides for **Set C**')
819
+ tbl_disp(dft_res,'select_genes','SetC_GRCh38')
820
+ elif dft_res_mut.shape[0]>0:
821
+ st.write('Mutated to **GRCh38** Reference Guides for **Set C**')
822
+ st.markdown(caution1,unsafe_allow_html=True)
823
+ tbl_disp(dft_res_mut,'select_genes','SetC_Mutated_GRCh38')
824
+ if dft_notfound.shape[0]>0:
825
+ st.write('**SetC Guides Not Found in GRCh38**')
826
+ #tbl_disp(dft_notfound,'select_genes','SetA_Notfound_GRCh38')
827
+ st.table(dft_notfound)
828
+
829
+ #Now CHECK FOR CHM13
830
+ dft_c = pd.DataFrame(columns=['gene','guide_type','protospacer_A','protospacer_B'])
831
+ dft_res=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
832
+ dft_res_mut=pd.DataFrame(columns=['sgID_1', 'sgRNA_1', 'chr_sgRNA_1', 'position_sgRNA_1', 'sgID_2', 'sgRNA_2', 'chr_sgRNA_2', 'position_sgRNA_2', 'sgID_1_2'])
833
+ dft_notfound=pd.DataFrame(columns=['gene','ref_guide'])
834
+ df_matched_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
835
+ df_mutated_guides_lr = pd.DataFrame(columns=['gene','ref_guide', 'chr', 'position', 'mutated_guide', 'strand', 'num_mismatch'])
836
+
837
+ for i in range(variant_set.shape[0]):
838
+ ref_listC=listC[listC['gene']==variant_set.iloc[i]['gene']][['guide_type','protospacer_A','protospacer_B','sgID_AB']]
839
+ ref_listC=ref_listC[['sgID_AB','guide_type','protospacer_A','protospacer_B']]
840
+
841
+ ref_listC.columns=['gene','guide_type','protospacer_A','protospacer_B']
842
+ res,res_mut,res_notfound,list_match,list_mutated=get_lists(ref_listC,listC_found_lr,listC_notfound_lr)
843
+ dft_c=dft_c.append(ref_listC)
844
+ if res.shape[0]>0:
845
+ dft_res=dft_res.append(res)
846
+ if res_mut.shape[0]>0:
847
+ dft_res_mut=dft_res_mut.append(res_mut)
848
+ if res_notfound.shape[0]>0:
849
+ dft_notfound= dft_notfound.append(res_notfound)
850
+ if list_match.shape[0]>0:
851
+ df_matched_guides_lr= df_matched_guides_lr.append(list_match)
852
+ if list_mutated.shape[0]>0:
853
+ df_mutated_guides_lr= df_mutated_guides_lr.append(list_mutated)
854
+
855
+ if dft_res.shape[0]>0:
856
+ st.write('Matched to **CHM13** Reference Guides for **Set C**')
857
+ tbl_disp(dft_res,'select_genes','SetC_CHM13')
858
+ elif dft_res_mut.shape[0]>0:
859
+ st.write('Mutated to **CHM13** Reference Guides for **Set C**')
860
+ st.markdown(caution1,unsafe_allow_html=True)
861
+ tbl_disp(dft_res_mut,'select_genes','SetC_Mutated_CHM13')
862
+ if dft_notfound.shape[0]>0:
863
+ st.write('**SetC Guides Not Found in CHM13**')
864
+ st.table(dft_notfound)
865
+ #NOW MERGE FROM GRCh38 and LR
866
+ merged_mutated_set=pd.merge(df_mutated_guides_ref,df_mutated_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
867
+ merged_mutated_set = merged_mutated_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
868
+ merged_match_set=pd.merge(df_matched_guides_ref,df_matched_guides_lr, how="outer",on=["gene","ref_guide","chr"],suffixes=["_GRCh38",'_LR'])
869
+ merged_match_set = merged_match_set[['gene','ref_guide','chr','position_GRCh38','position_LR','strand_GRCh38','strand_LR','mutated_guide_GRCh38','mutated_guide_LR','num_mismatch_GRCh38','num_mismatch_LR']]
870
+ if merged_match_set.shape[0]>0:
871
+ #st.write('**Matched** Guides for **Set C** (*Each guide sequence has a trailing NGG*)')
872
+ st.write('**Matched** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
873
+ tbl_disp(merged_match_set,'select_genes','SetC_Matched_GRCh38_CHM13',0)
874
+
875
+ #st.table(merged_match_seta)
876
+ elif merged_mutated_set.shape[0]>0:
877
+ #st.write('**Missmatched** Guides **Set C** (*Each guide sequence has a trailing NGG*)')
878
+ st.write('**Mutated** Guides for **Set C** to both **GRCh38 and CHM13 references** (*Each guide sequence has a trailing NGG* and **leading G even if it is a missmatch**)')
879
+ #st.markdown(caution1,unsafe_allow_html=True)
880
+ tbl_disp(merged_mutated_set,'select_genes','SetC_Mutated_GRCh38_CHM13',0)
881
+ elif ListCRes:
882
+ st.write("**Please select genes from the above table to begin**")
883
+