File size: 92,766 Bytes
15775d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00527f1
 
 
 
 
 
 
 
15775d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517ed73
15775d8
 
517ed73
15775d8
 
 
 
 
 
 
a31ca96
00527f1
15775d8
 
 
a31ca96
00527f1
15775d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a31ca96
15775d8
 
a31ca96
15775d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
815f6e5
 
 
 
15775d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517ed73
15775d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a31ca96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15775d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517ed73
15775d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a31ca96
15775d8
a31ca96
 
 
15775d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a31ca96
15775d8
 
 
 
a31ca96
 
 
 
 
15775d8
 
 
 
 
a31ca96
15775d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517ed73
15775d8
 
 
 
 
 
 
a31ca96
 
 
 
 
 
 
 
 
15775d8
 
 
 
 
 
a31ca96
15775d8
 
 
 
 
 
 
 
 
 
 
 
 
 
517ed73
 
 
 
 
 
 
 
 
15775d8
 
 
a31ca96
 
 
15775d8
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
#from turtle import shape
import streamlit as st
#from st_keyup import st_keyup
import pandas as pd
import numpy as np
from st_aggrid import AgGrid, GridOptionsBuilder,GridUpdateMode,DataReturnMode

import sys
import subprocess

import subprocess as sp

from subprocess import PIPE, run

import time
import os
from PIL import Image

cwd=os.getcwd()+'/'+'data/'

#get snps list
snps = pd.read_csv("SNPS.csv")
variants=snps['GENE:SNP'].unique()
variants_s=sorted(variants,key=len)


st.set_page_config(layout="wide") 
st.markdown(
    """
<style>
.streamlit-expanderHeader {
    font-size: x-large;
}
</style>
""",
    unsafe_allow_html=True,
)

latext1 = r'''
$$ 
f(s_j) = \frac{1}{1+exp(-g(s_j))} \\
g(s_j) = int + \sum_{i} w_{ij} \\
where f(s_j) \epsilon \ [0,1] \\
'''            
latext2 = r'''
$$ 
Specificity (g) = \frac{1}{1+\sum_{o\epsilon OffTargets(g)} CFD(o,g)} \\
'''            

gene_rank = r'''
$$ 
Score(g) = min[Specificity(g), Specificity(g'), 1.25*CuttingEfficiency(g)] \\
'''            

sgran_target_score = r'''
$$ 
score = \sum_{off\_targets}[\frac{log_{10}(dist) + score_{off\_target}}{total\_off\_targets}] - total\_off\_targets   - (2) \\
'''            

off_target = r'''
$$ 
score_{Off-target} = \sum_{mismatch} 1.2^{pos}    -  (1)\\
'''            

#for inline python code generation
class CodeGenerator:
    def __init__(self, indentation='\t'):
        self.indentation = indentation
        self.level = 0
        self.code = ''

    def indent(self):
        self.level += 1

    def dedent(self):
        if self.level > 0:
            self.level -= 1

    def __add__(self, value):
        temp = CodeGenerator(indentation=self.indentation)
        temp.level = self.level
        temp.code = str(self) + ''.join([self.indentation for i in range(0, self.level)]) + str(value)
        return temp

    def __str__(self):
        return str(self.code)

  
def ForeCastBE(target_seq,method,select_method,key):
    Editing_Efficiency = st.radio(
    "Base Editing_Efficiency Calculator (FORECasT-BE)",
    ('No', 'Yes'),horizontal=True,key=key)

    if Editing_Efficiency=='Yes':
        Editor = st.radio(
        "Editor",
        ('ABE', 'CBE'),horizontal=True)

        if len(variant_spl)>1:
            fname=cwd+variant_spl[1]+"_"+method+"_"+select_method+'.fasta'
            res_fname=fname+"_"+Editor+'.csv'

            #snpid=variant_spl[1]
        else:
            fname=cwd+variant_spl[0]+"_"+method+"_"+select_method+'.fasta'
            res_fname=fname+"_"+Editor+'.csv'
            
            #snpid=variant_spl[0]
        f = open(fname, "w")
        for i in range(len(target_seq)):
            f.write('>'+target_seq[i]+'\n')
            f.write(target_seq[i]+'\n')
        f.close()    

        with st.form(key='columns_in_form'):
            #c1, c2, c3 = st.columns(3)
            c2, c3 = st.columns(2)
            #with c1:
            #    inp_seq=st.text_input("Please Enter Target DNA sequence (20nt)",value='TGGAGTAGGGGCAAATTATT',disabled=False)
            with c2:
                mean1=st.text_input("Mean Editing Efficiency (None or 0-1)",value='None',disabled=False)
                
            with c3:
                std1=st.text_input("Standard deviation of editing efficiency (None or 0-1)",value='None',disabled=False)
            st.form_submit_button(label = 'Update')

        if st.button('Calculate'):
            b = CodeGenerator()
            b +='import forecast_be\n'
            b+="forecast_be.predict_batch_fasta("+"'"+fname+"'"+",editor="+"'"+Editor+"'"+",output_path="+"'"+res_fname+"'"+",mean="+mean1+",std="+std1+")\n"
            
            f = open("pytest1.py", "w")
            f.write(str(b))
            f.close()
            command2 = 'python '+ "pytest1.py"
            #result = run(command2, stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=True)
            run(command2, stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=True)
            Forecast_BE_Res=pd.read_csv(res_fname)
            #st.write(Forecast_BE_Res)
            Forecast_BE_Res.dropna(axis=1, how='all',inplace=True)
            if Forecast_BE_Res.shape[1]>1: # and mean1=='None' and std1=='None':
                display_ForeCast_BE_res(Forecast_BE_Res,method,variant_spl)
                #AgGrid(Forecast_BE_Res.dropna(axis=1, how='all'))
            #elif Forecast_BE_Res.shape[1]>3:
            #    display_ForeCast_BE_res(Forecast_BE_Res,method,variant_spl)
                #AgGrid(Forecast_BE_Res.dropna(axis=1, how='all'))
            else:
                st.markdown('**Editing Efficiency Calculator did not retrun any results**')
                
                
            #st.write(result.stdout)
            #st.write(result.stderr)
        #if os.path.exists(fname):
        #    os.remove(fname)
        #if os.path.exists(res_fname):
        #    os.remove(res_fname)


def transform(df,str):
    # Select columns
    #cols = st.multiselect('Please select columns to save current Table as csv file', 
    cols = st.multiselect(str, 
    df.columns.tolist(),
    df.columns.tolist()
    )
    df = df[cols]
    return df
@st.cache
def convert_df1(df):
    return df.to_csv(index=False).encode('utf-8')

def convert_df(df):
    return df.to_csv().encode('utf-8')

#BE-DICT
def BEDICT(method,select_method):
    #BE-DICT
    #if select_method == "BE-DICT":
    #tab1, tab2 = st.tabs(["Results", "Efficiency Calculators"])

    #with tab1:
    st.header("BE-DICT: "+method)
    expander = st.expander("Summary")
    #expander.markdown("**Summary**")
    expander.markdown("BE-DICT predicts base editing outcomes for **4 commonly used base editors (BEs)** by employing an **attention-based deep learning [algorithm](https://www.nature.com/articles/s41467-021-25375-z)** trained on high-throughput target library (of 28,394 target sequences) screens. It **predicts the editing of a target and surrounding bystander nucleotides** for the following ABEs and CBEs.")
    expander.write(
    """
    - Adenine Base Editors (ABEs)
        - Based on Adenine deaminase ecTad7.10 **(ABEmax)** and ecTadA-8e **(ABE8e)**
    - Cytosine Base Editors (CBEs)
        - Based on Cytosine deaminase rAPOPEC1 **(CBE4max)** and **Target-AID**
    """
    )
    expander.markdown("Most base editors convert target bases in a ~5- nucleotide region within the protospacer target sequence and undesired **bystander** editing of additional C or A bases in the editing window are common. All results shown here are based on bystander models for each ABE and CBE.")
    
    expander1 = st.expander("How it works")
    expander1.markdown(
        """
        BE-DICT utilizes comprehensive base editing data (to train and test) generated in the ABE and CBE target library screens (8,558 for ABEmax; 9,534 for CBE4max; 3,416 for ABE8e; 10,177 for Target-AID) for designing and training a machine learning model capable of predicting base editing outcomes at any given target site. The model has three main blocks: 
        -   **Embedding block** that embeds both the nucleotide’s and its correponding position from one-hot encoded representation to a dense vector representation.
        -   **Encoder block** that contains:
            -   A self-attention layer (with multi-head support).
            -   Layer normalization & residual connections. 
            -   Feed-forward network.
        -   **Output block** that contains:
            -   A position attention layer 
            -   A classifier layer.
        """
        )
    expander1.markdown('BE-DICT predicts base editing efficiencies with high accuracy from previously published experiments as well as 18/16 separate endogenous genomic loci for ABEmax/CBE4max and ABE8e/Target-AID.')
    expander1.markdown('Currently, BE-DICT is trained on datasets for ABEmax, CBE4- max, ABE8e, and Target-AID.' )
    expander2 = st.expander("References")

    expander2.write("[BE-DICT Web App](http://130.60.24.130/page-set?actionID=5f8c494b8c854d0029ffa9d3)")
    expander2.write("[BE-DICT Paper](https://www.nature.com/articles/s41467-021-25375-z)")
    
    #expander3 = st.expander("Tool options")
    
    
    
    expander3 = st.expander("Tool Options: All you can do with this tool")   
    
    #expander3.markdown('Options')
    #expander3.markdown(tips,unsafe_allow_html=True)
    expander3.markdown('This tool uses an **attention based deep learning framework** to predict base editing outcomes for **4 popular** base editors: ABEMax, ABE8e, CBE4max and Target-AID.')
    
    
    expander3.markdown(
        """
        Input: 
        -   2-column csv: 
            -   col1=Inp_seq
            -   col2=seq_id
        where **Inp_seq is a 20 bp** target sequence and seq_id is an identification.
        """
        )
    expander3.markdown(
        """
        Output: 
        -   5 column csv file
            -   Columns of interest: 
                -   Inp_seq: 
                -   Outp_seq:
                -   Pred_score: Higher is better
    """
    )
    #expander3.markdown('**Details about Score column**')
    #expander3.markdown('BE-DICT models (using multi-head self-attention inspired by the Transformer encoder architecture) and interprets dependencies of base editing on the protospacer target sequence')    
    # expander3.write(
    # """
    # - It takes a sequence of nucleotides of the protospacer as input and computes the probability of editing for each target nucleotide as output
    # - assigns a weight (attention-score) to each base within the protospacer
    # - The output is a probability score, reflecting the likelihood (between 0 and 1) with which a target base will be edited (C-to-T or A-to-G). 
    # """
    # )
    
    
    expander3.markdown("**Batch mode** can be run from [here](http://130.60.24.130/page?actionID=607552549609a200293b663f)")

    expander4 = st.expander("Scoring")
    expander4.markdown(
        """
        Editing efficiency of a base editor depends on: 
        - Consensus sequence preference of the deaminase.
        - Binding efficiency of the sgRNA to the protospacer.
        - Undesired ‘bystander’ editing of additional C or A bases in the editing window, among other factors.
        """
    )
    expander4.markdown("\n")
    expander4.markdown(
        """
        BE-DICT uses an **attention-based machine learning [algorithm](https://www.nature.com/articles/s41467-021-25375-z) (an encoder–decoder architecture)** to predict base editing outcomes of commonly used **ABEs** and **CBEs**. 
        - It takes protospacer sequence as input
            - Assigns a weight (attention-score) to each base within the protospacer
            - Computes the probability of editing for each target nucleotide as output            
            - The output is a probability score, reflecting the likelihood (between 0 and 1) with which a target base will be edited (C-to-T or A-to-G). 
        """
        )
    
    #expander4.markdown('It assigns a weight (attention-score) to each base within the protospacer with regard to its influence on the editing outcome. The **output** is a probability score (between 0 and 1) with which a target base will be edited (C-to-T or A-to-G).')
    expander4.markdown('BE- DICT implements **per-base (probability score whether a single base will be edited)** as well as **bystander (probabilities for all combinations of sequences with target-based and bystander transitions, as well as the probability of observing a wild-type sequence)** model.') 
    #expander4.markdown('Bystander model: Multiple A or C nucleotides within the editing window can lead to (undesired) bystander base conversions. It takes a sequence of nucleotides of the protospacer as input, and com- putes the probability of the different output sequences (i.e. probabilities for all combinations of sequences with target-based and bystander transitions, as well as the probability of observing a wild-type sequence)')
    st.markdown('**Please note that this tool only targets NGG PAM**')
    st.markdown("**Please note:** *Only one of all possible alleles is used to generate this output.*")
    st.markdown("**Also note that:** *All results shown here are based on bystander models for each ABE and CBE.*")
    st.markdown(caution,unsafe_allow_html=True)
    target_seq=display_res(method,',','rs_id',select_method)
    if len(target_seq)>0 and target_seq[0]!=0:
        ForeCastBE(target_seq ,method,select_method,11111)

#ChopChop
def Chopchop(method,select_method):
    st.header("Chopchop: "+method)
    expander = st.expander("Summary")
    
    expander.markdown(
        """
        ChopChop is a versatile tool that identifies  single guide RNA (sgRNA) targets for CRISPR–Cas9 system for:
        -   **DNA** 
        -   **RNA** 
        -   **Targeted enrichment of loci for long-read sequencing** 
        for over **200 genomes** and **3 transcriptomes**. \n\n
        It offers a wide range of selection of **CRISPR effectors** (Cas9, Cas9 Nickase, CasX, and Cas13), **Species**, and **Purpose** (knockout, knockdown, activation, repression, enrichment) alongside a variety of **Options** including selection of specific region, PAM sequences, various efficiency measures, primers, prediction of Cas9 repair outcomes etc.
        """
        )
    

    expander1 = st.expander("How it works")
    expander1.markdown("""
    CHOPCHOP accepts **input** in one of the following forms: 
    -   Gene name
    -   Genomic coordinates
        -   **In batch mode, we used** a text file containing chr:start-end per line for each snp. Ex: chr1:152220450-152220451".
    -   DNA sequence 
    Based on the input provided, chopchop retrieves sequence (corresponding to gene name/coordinates) and scan it for all potential target (and off-target) sites (based on search requirement selected).  
    **Each sgRNA** is then ranked according to: 
    -   Number of off-targets in the genome
    -   Number of mismatches lie within the off-targets. 
    -   Ranked by: 
        -   GC-content
        -   Presence of a guanine (G) at position 20 in the sgRNA target site
        -   Any target sites with the same score are then sorted by their position in the gene (with preference to 5′ positions).
    **Output:** A tab separated text file  
    - **Columns of interest**: 
        -   Target sequence
        -   Efficiency (**higher the better**)
    Please note that not all options have Efficiency defined [Ref](https://chopchop.cbu.uib.no/instructions)
        """
    )

    expander1.markdown("**Instructions to run Batch mode** can be found [here](https://bitbucket.org/valenlab/chopchop/src/master/)")
    #expander1.markdown(tips,unsafe_allow_html=True)
    
    expander2 = st.expander("References")    
    
    expander2.write("[ChopChop Web App](https://chopchop.cbu.uib.no)")
    expander2.write("[ChopChop Paper](https://academic.oup.com/nar/article/47/W1/W171/5491735)")
    

    expander3 = st.expander("Tool Options: All you can do with this tool")   
    
    expander3.write(
    """
    This tool offers sgRNA design for:
    - **DNA using:** 
        - CRISPR/Cas9 system for knockout, knockin, activation, repression and nanopore enrichment.
        - CRISPR/Cpf1 or CasX system for knockout, activation, repression and nanopore enrichment.
        - CRISPR/Cas9 nickase system for knockout and knockin.
        - TALEN system for knockout.
    - **RNA using:**
        - CRISPR/Cas13 (C2C2) for knockdown.
    **This tool also offers a variety of PAM sequences and other filtering options.** \n\n
    **Scoring:** It offers ***2 off-target detection*** methods and ***7 efficiency scores*** to aid users in selecting optimal sgRNAs for their research.
    """
    )


    
    expander4 = st.expander("Scoring")
    expander4.markdown('All CRISPR–Cas applications use an sgRNA to direct the CRISPR effector (Cas9 and its variants) protein to its target. In theory, CRISPR–Cas targeting only requires complementarity between the sgRNA and its DNA target. However, **efficient targeting** follows more complex factors including **1)** position of specific nucleotides in the target sequence, **2)** the accessibility of the target site, and **3)** the sequence of its flanking regions as well as specific design.')
    expander4.markdown(
        """
        **Efficiency score:** *Efficiency* column displays normalized score between 0-1. Based on **Cas9 effector (Cas9/Cas9 nickase/CasX/Cpf1)** used, user can select an appropriate scoring algorithm from one of the following options.
        -   **G20**: Prioritizes a guanine at position 20, just upstream of PAM.
        -   Doench et al. 2014 - only for NGG PAM
        -   Doench et al. 2016 - only for NGG PAM (default)
        -   Chari et al. 2015 - only NGG and NNAGAAW PAM's in hg19 and mm10
        -   Xu et al. 2015 - only for NGG PAM (but can be used for other PAMs)
        -   Moreno-Mateos et al. 2015 - only for NGG PAM.
        **Ranking:** Target sites are ranked according to: 
        -   Efficiency score
        -   Number of off-targets and whether they have mismatches
        -   Existence of self-complementarity regions longer than 3 nt. The number indicates how many regions of self-complementarity are predicted.
        -   GC-content
        -   Location of sgRNA within a gene (5’ (best) -> 3’ (worst))
        
        """
    )
    
    expander4.markdown(
        """
        \n\n
        Here we describe these scoring algorithms.\n\n
        **Sequence model for predicting sgRNA efficiency** in CRISPR/Cas9 knockout (and CRISPRi/a) experiments [Xu et](https://genome.cshlp.org/content/25/8/1147): 
        
        DNA sequence features that contribute to single guide RNA (sgRNA) efficiency in CRISPR-based screens
        - Predictive (sequence) model:
            -   Select a list of essential genes whose deletion resulted in a growth disadvantage in genome-wide knockout experiments from published data sets.
            -   Efficient sgRNAs:  all sgRNAs targeting these essential genes (two fold depletion).
            -   Inefficient sgRNAs:  all sgRNAs targeting these essential genes (very low depletion w.r.to positive control)
            -   Computedthe log odds ratio of nucleotide (40 bp sequences (aligned at the PAM) including the 19-bp or 20-bp spacer targets as well as their 3′ and 5′ flanking DNAs.) frequency between DNA sequences targeted by efficient and inefficient sgRNAs.
            - Feature selection (sgRNA nucleotides): (1) Signs of the odds ratios are concordant; and (2) magnitudes of the odds ratios are above a threshold in all three sgRNA sets, where the threshold was computed from a statistical significance analysis
            - Elastic-Net model was used to identify dominating nucleotide features: 28 sequence features were identified.
        """
    )
    expander4.markdown('Cas9 knockout mode: reports prediction of DSB repair outcomes. The model estimates the probability that a given sgRNA will result in a frameshift mutation.')
    expander4.markdown(
        """
        **CRISPR/Cas13 mRNA Knockdown:**
        - Input: Default is 28 bp (28 bp sgRNA without PAM and 1bp protospacer flanking site (PFS)
        - Output: MM0, MM1, MM2 and MM3 columns specify the number of off-target transcripts (with 0, 1, 2 or 3 mismatches, respectively) that your gRNA may bind to, outside of your target gene.
        **CRISPR/Cas9/Cas9 nickase Knock-in:** An sgRNA activity [model](https://www.nature.com/articles/nmeth.3543): 
        - Sequence: 35 bp (6nt upstream flanking + 20nt sgRNA + 3nt PAM + 6nt downstream flanking)
        -   Total Features: 684
            -   Mononucleotides Features: 140
                -   4 bases (A,C,G,T) × 35 nt (6nt upstream, 20nt sgRNA, 3nt PAM, 6nt downstream)
                -   Dinucleotide Features: 16 dinucleotides × 34 positions (of 35 bp sequence)
                -   Using randomized logistic regression on 684 features with regularization and selected 91 features.
        **Chari et. al [model](support vector machine model):** A support vector machine model
        """
    )
    expander4.markdown(
        """
        **A deep-learning-based (Convolutional Neural Network - CNN) based DeepCpf1 model for Cas12a/Cpf1 RNA [Kim et. al](https://www.nature.com/articles/nmeth.4104):** An automated feature engineering and prediction algorithm that automatically learn informative representations of target sequences relevant to Cpf1 activity profiles.
        -   Input: 34-bp target sequence
        -   Uses one-hot encoding input layer, convolution layer with 80 filter and rectified linear unit (ReLU) to the convolution outputs and three fully connected layers of 80, 40, and 40 units, chromatin accessibility integration layer and regression output layer.
        """
        )
    expander4.markdown(
        """
        **A predictive model (logistic regression classifier) for sgRNA activity (CRISPR/Cas9 knockout) [Doench](https://www.nature.com/articles/nbt.3026):** 
        Efficiency score of a CRISPR/Cas9 system depends on:
        -   Nucleotide composition of the DNA downstream from the spacer target
        -   Sequence features: 4 nt upstream of the sgRNA target site, the 20 nt of sgRNA complementarity, the PAM and the 3 nt downstream of the sgRNA target sequence.
        """
    )
    expander4.markdown('Self-complementarity column: Data suggests that self-complementarity within the gRNA or between the gRNA and RNA backbone can inhibit gRNA efficiency (Thyme et al., 2016 ). This option searches for complementarity within the gRNA, and between the gRNA and either a standard backbone (AGGCTAGTCCGT), an extended backbone (AGGCTAGTCCGT, ATGCTGGAA) or a custom backbone. Some users will choose to replace the leading nucleotides of their gRNA with “GG” for T7 transcription. Check this box to search for complementarity with the GG replacement.')
    expander4.markdown(
        """
        **Method for determining off-targets in the genome ((selectable from Cas9/CasX/Cpf1 tabs in Options)):** 
        -   **Cong et al., 2013:** single-base mismatches up to 11 bp 5' of the PAM completely abolish cleavage by Cas9. However, mutations further upstream of the PAM retain cleavage activity. We have created a uniqueness method that searches for mismatches only in the first 9 bp, since a mismatch further towards the PAM motif is predicted to cause no cleavage. 
        -   **Hsu et al., 2013:** mismatches can be tolerated at any position except in the PAM motif. 
        -   Default Method: Searches for mismatches only in the 20 bp upstream of the PAM.
        """
    )

    st.markdown("**Please note that the tool was run for CRISPR/cas9 for NGG (knock-out), CRISPR/cas9-nickase (knock-out) for NGG and NRG (R=A or G), CRISPR/cpf1 for TTN, CRISPR/CasX for TTCN PAM and CRISPR/cas13(c2c2).**")
    st.markdown("**Please note that not all options results in an efficienc score (0 is reported in efficiency column).**")
    st.markdown(caution,unsafe_allow_html=True)
    display_res(method,'\t','snp_id',select_method)            

#
def Guidescan2(method,select_method):
    st.header("Guidescan2: "+method)
    #st.markdown("**Summary**")
    expander = st.expander("Summary")
    expander.markdown("GuideScan2 employes Cas9 (tracrRNA and crRNA) and Cas12a (also known as cpf1, requires only crRNA) for sgRNA **(single- and paired-gRNA)** design (coding and noncoding genomic regions) for 8 organisms. It enables construction of high-specificity gRNA databases with reduced off-target effects.")
    expander.markdown("CRISPR-Cas9 targets a 20-nucleotide spacer sequence at the end of the gRNA that is complementary to a DNA protospacer sequence followed immediately at the 3’ end by a PAM of the form NGG (more efficient targeting) or NAG (less efficient); here N stands for a ‘wildcard’, i.e. can match any nucleotide. Other natural and engineered CRISPR-Cas systems can **vary in PAM sequence, PAM position with respect to the protospacer sequence, and requirements on the level of similarity between gRNA and the target.**")
    expander.markdown("Given a genomic region, the task of gRNA design is to find gRNAs that can target anywhere in that region. Many potential gRNAs can target at multiple locations in the genome with varying efficiency. Typically a gRNA is designed to target a particular location with **perfect complementarity** with all other targets of this gRNA are being **off-targets**. **Goal** of gRNA design is typically to **maximize gRNA efficiency at the primary target site while minimizing off-targeting.**")
    expander.markdown("Variants and extensions of the gRNA design task include: paired gRNA design to select two gRNAs targeting flanking sites of a genomic region of interest; saturation experiment design to exhaustively select all gRNAs expected to target a selected region of interest; and library design to select a small number of the most effective gRNAs for each of hundreds or thousands of regions of interest.")
    expander1 = st.expander("How it works")
    
    expander1.write(
    """
    **Algorithm*:* 
    -   A single gRNA is evaluated against a genome B (Burrows-Wheeler Transform compressed genome and index).
        -   All occurences of the sgRNA (A spacer sequence g and PAM set P, for all g′ in a Hamming distance (depth-first search using rank-queries on the forward and reverse complement strands of the BWT of the genome) ball of radius k centered at g) in B are identified. 
        -   These occurrences are validated against the PAM set P, **pruning** any occurrences that are not followed by a PAM in this set. 
        -   This set of validated occurrences forms the **set of targets for this gRNA**. 
            -   gRNAs that have multiple perfect occurrences (indistinguishable intended target) are **filtered out**.
        -   **gRNA with a single perfect occurrence**, considered to be its primary target, is then included in the database.
        -   All other **targets that contain mismatches are considered off-targets.**
    
    -   GuideScan allows: target sequences, PAM, PAM position relative to the gRNA binding sequence, and gRNA length. 
    **Potential off-targets:**
    -   Uses a retrieval tree (trie, preprocess the targetable space in the genome, i.e. **all 20-mers followed by primary and secondary PAMs**) data structure to efficiently and precisely enumerates all targetable sequences (with a specific number of mismatches) present in a given genome. 
    """
    )

    expander2 = st.expander("References")    
    
    expander2.write("[GuideScan2 Web App](https://guidescan.com)")
    expander2.write("[GuideScan2 Paper](https://www.biorxiv.org/content/10.1101/2022.05.02.490368v1)")
    
    expander3 = st.expander("Tool Options: All you can do with this tool")
    
    expander3.write(
    """
    This tool offers sgRNA design for:
    - **CRISPR/Cas9**
    - **CRISPR/Cpf1**
    - **Please note that this tool work best for genomic intervals >30bp.**
    **Input:**  
    -   Line delimited Genomic intervals (or DNA sequence) as a text file in the webapp **[here](https://guidescan.com/)** in the following format (of genomic range 30bp, 40bp etc):
        - Line1: chr10:11676698-11676728
        - Line2: chr1:152220435-152220465
        - and so on
    **Output:**
    -   A csv file containing all gRNAs within the genomic regions provided in the input file
    **Columns of interest (at most 6 gRNAa are reported from all possible)**:
    -   gRNA-Seq and Target-Seq
    -   PAM
    -   Number of off-targets
    -   Cutting efficiency (**Higher the better**)
    -   Specificity (**Higher the better**) [Ref](https://www.biorxiv.org/content/10.1101/2022.05.02.490368v1.full.pdf)
    -   Rank:
        -   Uses a score that balances maximizing the gRNA specificity and cutting efficiency.
        
    """
    )
    expander3.markdown(gene_rank)
    
    expander4 = st.expander("Scoring")
    expander4.markdown(
        """
        **Efficiency (Please see Scoring and Quality Matrices of the README section of this app):** Rule Set 2 [DOENCH 2016](https://www.nature.com/articles/nbt.3437)
        -   sgRNAs were filtered out with cutting efficiency less than 0.25 or specificity less than 0.20
        -   Selected six gRNAs for each gene. For genes with more than six sgRNAs, Ranked Genes and (selected six genes)
            -   Ranked gRNAs for each gene using a simple score that balances maximizing the gRNA specificity and cutting efficiency.
            -   Nucleotide at the 5’ end of gRNA (called g) is replaced with a G (called g') for better efficiency
            -   Ranking gRNAs for each gene is defined as
        **sgRNA Specificity and Rank score:** [DOENCH 2016](https://www.nature.com/articles/nbt.3437)
        """
    )
    expander4.markdown(latext2)
    #expander4.markdown('sgRNA Specifity (off-target) Score')
    expander4.markdown('sgRNA Rank Score')
    expander4.markdown(gene_rank)
    
    st.markdown("**Please note that the software was run with Cas9 (NGG PAM) and cpf1 (TTG PAM) option with all other options left as default.**")
    st.markdown(tips,unsafe_allow_html=True)
    st.markdown(caution,unsafe_allow_html=True)
    
    display_res(method,',','query',select_method)

def PE_DESIGNER(method,select_method):
    st.header("PE-Designer: "+method)
    expander = st.expander("Summary")
    expander.markdown(
    """
    Prime Editors **(PEs)**, employes Cas9 nickase fused to an engineered reverse transcriptase template **(RTT, a wild-type Moloney Murine Leukemia Virus (M-MLV) for PE1 and mutagenised M-MLV in PE2 systems for enhanced DNA-RNA affinity, enzyme processivity, and thermostability)** via a gRNA called prime editing guide RNA **(pegRNA), can do all 12 edits.** PEs use pegRNA consisting of a 20 nt guide sequence, a primer binding site **(PBS)** and an RTT. The guide directs the Cas enzyme to a target site, the PBS hybridizes to the opposite strand to prime the reverse transcriptase, and the RTT integrates the desired genomic alteration.  
    
    **Optimized** PE2 (by employing additional sgRNA to nick the unedited strand so that cell's natural repair system copies the information in the edited strand to the complementary strand, permanently installing the edit) called PE3 and PE3b with reduced off-targets are commonly used.  
    

    """
    )
    #PE-Analyzer finds cleavage sites and defines 15-nt indi- cator sequences on both sides of the given input reference sequence within a user-defined comparison range (R) pa- rameter. PE-Analyzer selects the sequences, including both indicator sequences with up to a 1 bp mismatch, from the uploaded NGS data. PE-Analyzer counts the number of identical sequences and excludes the sequences with a lower count than a minimum frequency (the default value is 1) be- cause such sequences are considered to include sequencing errors. 
    expander1 = st.expander("How it works")
    expander1.markdown(
        """
        PEs use prime editing guide RNAs (pegRNAs) which is comprised of target sequence recognizing protospacer, RTT containing the desired edit, and a primer binding site for the activation of reverse transcriptase. While trafitional PE2 only requires a pegRNA, PE3 also requires a nicking guide RNA (ngRNA) for increased editing efficiency accompanying possible mutations derived from the presence of the ngRNA.  
        PE-Designer provides all possible:
        -   protospacer sequences
        -   3′ extension sequences
        -   ngRNA sequences
        -   Potential off-target sites
        """
    )
    expander2 = st.expander("References")
    
    expander2.write("[PE Designer Web App](http://www.rgenome.net/pe-designer/)")
    expander2.write("[PE Designer Paper](https://academic.oup.com/nar/article/49/W1/W499/6262559)")
    
    expander3 = st.expander("Tool Options: All you can do with this tool")
    
    expander3.write(
    """
    This tool can be run for different PAM sequences including **NGG**, NRG etc. We only provide results for NGG.  
    **As PEs can do all 12 edits, results on all possible 3-edits for a given single base edit are presented here**    
    **Input:** We use 40 nt sequence with base of interest lying at position 20 for both reference input seq and desired edited sequence.
    -   fasta files containing NT sequence (of the target and expected edited) are provided with the following format:
        -   Target Sequence
            -   Line1: >inp_seq 1
            -   Line 2: Upto 1000 NT sequence
        -   Desired edited Sequence
            -   Line1: >inp_seq 1
            -   Line 2: Upto 1000 NT sequence
    **Output:** A csv file
    -   Columns of interes
        -   snp_ind
        -   inp_seq
        -   edited_seq
        -   Target Sequence predicted by PE-Designer
        -   Edit position
        -   Type (pegRNA or nCas9)
        -   Extension Sequence (for pegRNA)
        -   Traget Sequence (for nCas9)
    """
    )
    
    #expander3.markdown(tips,unsafe_allow_html=True)
    expander4 = st.expander("Scoring")
    expander4.markdown(
        """
        This tool only reports potential off-targets alongwith number of mismatches.
        """
    )    
    st.markdown("**Please note that the tool was run for all three edits for a given base.**")
    st.markdown(caution,unsafe_allow_html=True)    
    if method=='NGG':
        target_seq = display_res(method,'\t','snp_id',select_method)
        PE_Designer_Off=st.checkbox('Off-Targets')
        if PE_Designer_Off:
            display_res('offtargets_'+method,',','snp_id',select_method)
    else:
        st.markdown("**List coming soon**")

    
#Pnbdesigner
def Pnbdesigner(method,select_method):
    st.header("PnB Designer: "+method)
    expander = st.expander("Summary")
    #expander.markdown("**NEW: PnB Designer, we designed pegRNAs to model all known disease causing mutations available in ClinVar. Additionally, PnB Designer can be used to design guide RNAs to install or revert a SNV, scanning the genome with one CBE and seven different ABE PAM variants and returning the best BE to use**")
    #expander.markdown("END NEW")
    expander.markdown(
        """
        Single base editors (BEs) employe cytidine-deaminase (Cytosine BE, CBEs: C/G -> T/A converters) or Adenine-deaminase (Adenine BE, ABEs: A/T -> G/C converters) **can only introduce 4 edits** via sgRNA. 
        
        Prime Editors **(PEs)**, employing Cas9 nickase fused to an engineered reverse transcriptase template **(RTT, a wild-type Moloney Murine Leukemia Virus (M-MLV) for PE1 and mutagenised M-MLV in PE2 systems for enhanced DNA-RNA affinity, enzyme processivity, and thermostability)** via a gRNA called prime editing guide RNA **(pegRNA)**, on the other hand **can do all 12 edits.** PEs use pegRNA consisting of a 20 nt guide sequence, a primer binding site **(PBS)** and an RTT. The guide directs the Cas enzyme to a target site, the PBS hybridizes to the opposite strand to prime the reverse transcriptase, and the RTT integrates the desired genomic alteration. **Optimized** PE2, called (by employing additional sgRNA to nick the unedited strand so that cell's natural repair system copies the information in the edited strand to the complementary strand, permanently installing the edit) PE3 and PE3b with reduced off-targets are used. 
        
        **PnB Designer** allows design of pegRNAs for PEs and guide RNAs for CBE and the most recent ABEs such as ABEmax and ABE8e. PnB Designer makes it easy to design targeting guide RNAs for single or multiple targets on a variant or reference genome from organisms (and non-model organisms or synthetic constructs) spanning multiple kingdoms. **PnB Designer enables design of pegRNAs for all known disease causing mutations available in ClinVar**
        
        **Nicking guides** for the PE3 and PE3b systems are designed and filtered to provide a suitable selection of gRNAs. For PE3, only nicking guides 40–100 nt up/downstream of the initial nick are considered. For PE3b, only PAM sequences on the complementary strand that partially overlap with the PE2 PAM or protospacer sequence are displayed.
        """
        )
    #expander.markdown("Nicking guides for the PE3 and PE3b systems are designed and filtered to provide a suitable selection of gRNAs. For PE3, only nicking guides 40–100 nt up/downstream of the initial nick are considered. For PE3b, only PAM sequences on the complementary strand that partially overlap with the PE2 PAM or protospacer sequence are displayed.")
    #expander.markdown("PnB Designer allows design of pegRNAs for PEs and guide RNAs for CBE and the most recent ABEs such as ABEmax and ABE8e. PnB Designer makes it easy to design targeting guide RNAs for single or multiple targets on a variant or reference genome from organisms (and non-model organisms or synthetic constructs) spanning multiple kingdoms. It has been used PnB Designer to design candidate pegRNAs to model all human mutations in ClinVar")
    #expander.markdown("**PnB Designer enables design of pegRNAs for all known disease causing mutations available in ClinVar**")
    
    expander1 = st.expander("How it works")
    expander1.markdown(
        """
        **Prime editing steps (PE1):** 
        -   Generation of a single-stranded break (SSB) in the non-target strand via Cas9 H840A nickase
        -   DNA/RNA hybridization between the primer binding site (PBS) in the pegRNA and the 3′-region of the nicked strand
        -   Reverse transcriptase (RT) mediated reverse transcription of the nicked strand according to the RT template sequence, which generates a 3'-flap containing the edit
        -   Incorporation of the 3'-flap sequence in the DNA following ligation
        -   The edited DNA strand displaces the unedited 5’ flap and the resulting heteroduplex is resolved by the cell’s mismatch repair (MMR) system
        **PE3 and PE3b:**
        -   PE3 uses the PE2 Cas9 nickase-pentamutant (five mutations in RT enzyme (D200N/L603W/T330P/T306K/W313F) to increase activity, enhance binding between the template and PBS, increase processivity, and improve thermostability) RT fusion enzyme and pegRNA plus an additional simple sgRNA, which directs the Cas9 nickase to nick the unedited strand at a nearby site. The newly edited strand is then favored as the template for repair during heteroduplex resolution. The process of double nicking, however, increases indel formation slightly. Designing the sgRNA with a spacer that only binds the edited strand, as in the PE3b system, guides nicking of the unedited strand only after the edit has occurred.
        
        """
    )
    image = Image.open('pe1.png')
    expander1.image(image, caption='Prime Editing. https://www.addgene.org/crispr/prime-edit/')
    expander1.markdown(
    """
    **Design strategy for pegRNAs:**
    -   PnB Designer scans the sense and antisense strands to find all possible 5′-NGG-3′ protospacer adjacent motif (PAM) sites around the edit position, beginning+6 nt to the 3′ end of the desired edit and then scanning 100 nt in the 5′ direction, giving the user the option to choose also very distant PAMs. 
    -   All possible NGG PAMs are stored and evaluated in respect to their distance from the edit position and the input RTT length. 
    -   A pegRNA is considered a possible candidate if the edit is fully covered by the RTT. 
    -   PnB Designer then stores the protospacer, PBS, and RTT sequences. 
    -   Nicking guides for the PE3 and PE3b systems are designed and filtered to provide a suitable selection of gRNAs. 
    -   For PE3, only nicking guides 40–100 nt up/downstream of the initial nick are considered. 
    **Design strategy for BEs gRNAs:**
    **Input:**
    -   Sequence:
        -   Upstream and downstream sequence with desired edit such as  (one of A>G, T>C, C>T, G>A)
    -   Genomic coordinates: Retrieves the genomic sequence from the selected reference genome and converts the specific variant sequence to include the SNV. 
        -   The resulting sequence is searched for **PAM (5′-NGG-3′ (SpCas9), 5′-NGA-3′ (SpCas9-VRQR), 5′-NGCG-3′ (SpCas9-NG), 5′-NNGRRT-3′ (SaCas9), 5′-NNNRRT-3′ (SaCas9-KKH), SpG 5′-NGN-3′, SpRY 5′-NRN-3′ and 5′-NYN-3′)** sites in the right distance to the SNV given the described editing windows of the Cas9-ABE and -CBE variants. 
        -   With ABEs, C → T genomic variants can be reverted by A → G conversion on the antisense strand to achieve the intended edit on the sense strand. 
        -   With CBEs, C → T and G → A conversions are possible. All previously described PAM variants with their respective editing window are tested against the edit.
        -   Editing windows are defined based on the experimental data. 
            -   ABEmax was implemented with an editing window from base 5–7, with base 1 being the most distal from the PAM site. 
            -   For BE3 (R33A/K34A), the strong sequence preference for a 5′ T next to the edit has been included as well
    """
    )
    expander2 = st.expander("References")
    
    expander2.write("[PnB Designer Web App](https://fgcz-shiny.uzh.ch/PnBDesigner/)")
    expander2.write("[PnB Designer Paper](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-021-04034-6)")
    
    expander3 = st.expander("Tool Options: All you can do with this tool")
    
    expander3.write(
    """
    This tool can be run in **two modes**:
    - **Base editing mode:**
        - Does not allow A>T or G>C, dels, or insertions.
        - Only **180**/414 variants could be targeted. 
        - **Columns of interest**: Protospacer, PAM and Base Editor (the system for producing the base edit). 
        -   **There is no score**.
    - **Prime editing mode:**
        - Requires two guides: detailed in two files.
        - pegRNA oligos for cloning.
        - **Score: Higer is better**.
        - Nicking guides: the corresponding nicking guides
    
    **Input**
    -   Multiple lines provided in as a csv file in the webapp **[here](https://fgcz-shiny.uzh.ch/PnBDesigner/)** in the following format:
        - Prime editing:
            - varinat, chromosome num, genomic location, Edit, gene orientation, PBS, RTT
            - Ex: rs7412, 19, 44908822, insA, +, 13, 13
        - Base editing
            - varinat, chromosome num, genomic location, SNO, gene orientation, PBS, RTT
            - Ex: rs7412, 19, 44908822, C>T, +
    **Output:** A csv file
    -   Base editing  
        -   20 nt protospacer sequence (targeting the variant sequence)
        -   Edit position
        -   PAM site
        -   Suggested base editor that can target the variant
        -   Score: No score is provided
    -   Prime editing
        -   pegRNA (protospacer and extension seq for both strands)
        -   Edit position
        -   PAM and PAM strand
        -   Score (Higher the better)
        -   PBS and RTT length
        -   Nicking guides (PE3 or PE3b system) shown by selecting Nicking_guides_PE3_PE3B from the side bar.
    """
    )
    
    #expander3.markdown(tips,unsafe_allow_html=True)
    expander4 = st.expander("Scoring")
    expander4.markdown(
        """
        **Prime Editing:**  
        Primer binding site (PBS) and reverse transcriptase templeta (RTT) length are important parameters for successful pegRNA design. In PnB Designer, PBS and RTT lengths are by default set at suggested values of 13 nt. The pegRNA Score follows a [penalty system](https://www.nature.com/articles/s41586-019-1711-4) with **larger negative numbers** are indicative of worse pegRNA designs.  
        
        **pegRNA Score (Sum of all penalties):**
        -   Penalty system
            -   C as the first base in the 3′ extension: penalty score =  − 28
            -   T (thymine) > 4 nucleotides in the 3′ extension of the pegRNA are strongly penalized with a score =  − 50
            -   number of homologous bases after the intended edit < 5, penalty score = − 6 
            -   larger negative numbers are indi- cating worse pegRNA designs
        """
    )    
    st.markdown("**Please note that the tool was run in Base editing and Prime editing modes. Corresponding nicking guides are also reported here.**")
    st.markdown(caution,unsafe_allow_html=True)
    target_seq = display_res(method,',','query',select_method)
    #st.write(target_seq)
    if len(target_seq)>0 and target_seq[0]!=0:
        ForeCastBE(target_seq ,method,select_method,10101010)
        

#SNP_CRISPR
def SNPcrispr(method,select_method):
    st.header("SNP_CRISPR: "+method)
    #st.markdown("**Summary**")
    expander = st.expander("Summary")
    expander.markdown(
        """
        **SNP-CRISPR** 
        -   Facilitate identification of sgRNAs in **non-reference genomes**, **across varying genetic backgrounds** or for specific targeting of **SNP-containing alleles** (for example, disease relevant mutations). 
        -   It computes **efficiency and specificity** scores for sgRNA designs targeting **both** the **variant** and the **reference**.
        -   Can be used to design sgRNAs based on public variant data sets or user-identified variants
        -   Design sgRNAs for NGG and NAG PAM sequences
        """
    )
    expander1 = st.expander("How it works")
    expander1.write(
    """
    **Design:**  
    - SNP-CRISPR validates the input reference sequences and **warn if the submitted reference sequences does not match**, which might reflect a different version of the genome assembly being used in the user input vs. SNP-CRISPR and re-constructs the template sequence, swapping the reference nucleotide with the variant nucleotide for SNPs, while inserting or deleting the corresponding fragment for indel type variants.
    - Computes potential variant-targeting sgRNAs based on **availability of PAM (NGG or NAG) sequences** in the neighboring region. 
    - sgRNA designs that contain **four or more consecutive thymine residues**, which can result in termination of RNA transcription by RNA polymerase III, **are filtered out**.
    - Computes efficiency [Housden et al. 2015](https://pubmed.ncbi.nlm.nih.gov/26350902/) and specificity (based on BLAST results against the reference genome) scores.  
    **For identification of the best variant-specific sgRNAs, following information are provided.**
    -   Iinformation on both sgRNAs targeting specific variants and sgRNAs targeting the reference sequence in the same region. 
    -   The efficiency score and an off-target score
    -   Positions of relevant SNPs or indels in the sgRNA are included.
    """
    )
    #expander1.markdown("**To facilitate identification of the best variant-specific sgRNAs, we provide information about both sgRNAs targeting specific variants and sgRNAs targeting the reference sequence in the same region. The efficiency score and an off-target score are provided, and the positions of relevant SNPs or indels in the sgRNA are included so that users can select the most suitable sgRNA or filter out less optimal ones.**")
    
    
    expander2 = st.expander("References")
    expander2.write("[SNP_CRISPR Web App](https://www.flyrnai.org/tools/snp_crispr/web/)")
    expander2.write("[SNP_CRISPR Paper](https://academic.oup.com/g3journal/article/10/2/489/6026318)")
    
    
    expander3 = st.expander("Tool Options: All you can do with this tool")
    #expander3.markdown(tips,unsafe_allow_html=True)
    expander3.write(
    """
    This tool can design guides for:
    - **NGG.**
    - **NAG.**
    - **Target multiple variants within the same guide.**
    - Public variant data sets or user-identified variants.  
    **Input:** Multiple lines provided in as a (6 columns) csv file uploaded to the webapp **[here](https://www.flyrnai.org/tools/snp_crispr/web/)** in the following format:
    - varinat, chromosome, position, strand, reference, variant
    - Ex: rs7412, 19, 44908822, C, +, T
    **Output:** A csv file  
    -   **Columns of interest**: 
        -   Housden Efficiency Score [Ref](https://www.ncbi.nlm.nih.gov/pubmed/26350902) (Range from 1.47-12.32 **(higher is better, > 5 recommended))**
        -   Off Target Score (Range from 0-5441.73 (lower is better, < 1 recommended))
    """
    )
    
    expander4 = st.expander("Scoring")
    expander4.markdown(
        """
        **Efficiency score:**
        -   Used 75 sgRNAs to target a single sequence cloned into a luciferase reporter.
        -   3 replicates using sgRNAs with 0, 1, 2, or ≥ 3 mismatches (black bars) or in the absence of sgRNA
        -   A probability matrix (of size 4(bp)x20(position in sgRNA)) is dynamically computed reflecting a cumulative P value for high efficiency of each nucleotide from position 1 to 20, with higher values representing higher efficiency.
        **Off-target (Specificity) score:**        
        -   Potential off-target loci are evaluated by performing a BLAST search of each (sgRNA) design against the species reference genome. 
        -   3 base pairs mismatch (on the 5' end of sgRNA) as a cutoff to detect an off-target
        -   An off-target score is assigned based on both the number of hits found in the BLAST results and the number of mismatched nucleotides per off-target hit.         
        """
    )
    st.markdown(caution,unsafe_allow_html=True)
    st.markdown("**Please note that the software was run for NAG and NGG PAM sequences only.**")

    display_res(method,',','Gene',select_method)

#ECRISP
def ecrisp(select_method):
    st.header("E-CRISP")
    expander = st.expander("Summary")
    #st.markdown("**Summary**")
    expander.markdown("E-CRISP is used to design gRNA sequences **(supports 12 organisms)** and can also reevaluate CRISPR constructs for on- or off-target sites and targeted genomic loci. It identifies target sequences complementary to the gRNA ending in a 3ʹ protospacer-adjacent motif (PAM), **N(G or A)G** and uses a fast indexing approach to find binding sites and a binary interval tree for rapid annotation of putative gRNA target sites.")
    expander.markdown("**Off-target** effects and target-site homology are evaluated using Bowtie2 aligner. Designs are **shown** in the output if the number of **off-targets does not exceed a user-specified threshold**. **More than one** design targeting a desired locus are **ranked** according to on-target specificity and number of off-targets.")
    expander1 = st.expander("How it works")
    expander1.markdown(
        """
        -   E-CRISP identifies target sequences ending with a PAM motif 5′-NGG/NAG-3′ and uses them to propose guide RNAs. 
        -   It uses a fast indexing approach to locate binding sites and the alignment program Bowtie 2 to identify off-target effects. 
        -   Designed sgRNAs are assessesed based on genomic context (e.g. exons, transcripts, CpG islands) and ranked according to target specificity and efficiency.
        """
    )
    expander1.write(
    """
    **Input:**
    -   Multiple lines provided in the Input fasta sequence edit box in the webapp **[here](http://www.e-crisp.org/E-CRISP/index.html)** in the following format
        - Line1: rs12726330
        - Line2: CGGGACATGGAAGAGGTCTGGACCAGGGTACTGGGAAGGCGCTCGGAGGA
        - Line3: rs76763715
        - Line4: CCAGCCGACCACATGGTACAGGAGGTTCTAGGGTAAGGACAAAGGCAAAG
        - and so on
    **Output:**
    -   A tab separated .tab file
        - **Columns of interest**:
            -   sgRNA Length
            -   Efficiency Score (E Score, **Higher the better**) [Ref](https://www.nature.com/articles/nbt.3026) 
            -   Specificity Score (S Score, **Higher the better** (max = 100))
            -   Doench and Xu Score
            -   Nucleotide sequence (A, C, G, T) compositions in %
    """
    )
    
    expander2 = st.expander("References")
    expander2.write("[E-CRISP Web App](http://www.e-crisp.org/E-CRISP/)")
    expander2.write("[E-CRISP Paper](https://www.nature.com/articles/nbt.3026)")
    expander3 = st.expander("Tool Options: All you can do with this tool")
    expander3.write(
    """
    This tool offers single or paired sgRNA and:
    - **Options for PAM:**
        - **Relaxed**
        - **Medium:**
        - **Strict** 
    - **Options for Design:**
        - knockdown.
        - knockin.
        - N/C terminal tagging.
        - CRISPRi.
        - CRISPRa.
    - **Other filtering options.**
        -   gRNA length, allowed % of G, C, A and T, 3' and 5' flanking sequence length, off-targets evaluation etc
    """
    )
    
    expander4 = st.expander("Scoring")
    expander4.markdown(
        """
        E-CRISP utilises its own **SAE (Specificity, Annotation, Efficacy) score** to determine the quality of each sgRNA in addition to Rule Set 1 [Doench et al](https://www.nature.com/articles/nbt.3026) and [Xu et al](https://genome.cshlp.org/content/25/8/1147). Please see Scoring and Quality Matrices in README tab of this app for details.
        -   Specificity Score (S-score):
            -   Start with 100.
            -   For every off-target, substract (20-mismatches)/iteration.
        -   Annotation Score (A-score):
            -   Start with zero
            -   For every hit exon add 5/exon count
            -   For every hit CpG Island subtract 1
            -   For every start codon hit add 1
            -   For every stop codon hit add 1
            -   For every CDS hit add 5/CDS count
            -   For every gene hit add 1
        -   Efficacy Score (E-score):
            -   Add 1 if last 6 bp have a CG content higher then 70 %
            -   Subtract 1 if the entire sequence has GC content > 80 %
            -   Add 1 if sequence is preceded by a G
            -   Add 1 if there are GG in front of the target sequence (opposite the PAM)
            -   Add micro-homology score (is higher when sequence tends to give out of frame deletions)
        """
    )

    
    st.markdown(tips,unsafe_allow_html=True)
    st.markdown("**Please note that the result reported here are for PAM=NGG**")
    st.markdown(caution,unsafe_allow_html=True)
    #st.header(select_method)
    fnm=cwd+select_method+'/'+select_method+'_NGG'+'.csv'
    data = pd.read_csv(fnm, sep=',')
    #get snp data
    #data_snp = data[data['Name'].str.contains(variant_spl[0])]

    if len(variant_spl) > 1: #variant_spl has two components
        #data_snp = data[data['rs_id'].str.contains(variant_spl[1])]
        data_snp = data[data['Name'].str.contains(variant_spl[1])]
        data_snp['Name']=variant_spl[0]+':'+data_snp['Name']
    else:
        #data_snp = data[data['rs_id'].str.contains(variant_spl[0])]
        data_snp = data[data['Name'].str.contains(variant_spl[0])]
        data_snp['Name']='NaN'+':'+data_snp['Name']
    data_snp.reset_index(drop=True, inplace=True)


    data_snp.reset_index(drop=True, inplace=True)
    if data_snp.shape[0]>0:
        df = transform(data_snp,'Please Select columns to save whole table')
        #fname = st.text_input('Please input file name to save Table', 'temp')
        #fname = st_keyup("Please input file name to save Table", value='temp')
        csv = convert_df(df)

        st.download_button(
            label="Download Table as CSV file",
            data=csv,
            file_name=select_method+'_'+variant_spl[0]+'.csv',#fname+'.csv',
            mime='text/csv',
        )

    #st.table(data_snp)
    if len(variant_spl) > 1:
        f"""
            **Results for SNP: {variant_spl[1]} on GENE: {variant_spl[0]}**
        """
    else:
        f"""
            **Results for SNP: {variant_spl[0]} on GENE: NAN**
        """

    #AgGrid(data_snp)
    st.markdown(table_edit,unsafe_allow_html=True)
    gb = GridOptionsBuilder.from_dataframe(data_snp)
    gb.configure_pagination(enabled=False)#,paginationAutoPageSize=False)#True) #Add pagination
    gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
    gb.configure_selection(selection_mode="multiple", use_checkbox=True)

    gb.configure_side_bar()
    gridOptions = gb.build()    

    grid_response = AgGrid(
        data_snp,
            height=200,
            gridOptions=gridOptions,
            enable_enterprise_modules=True,
            update_mode=GridUpdateMode.MODEL_CHANGED,
            data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
            fit_columns_on_grid_load=False,
            header_checkbox_selection_filtered_only=True,
            use_checkbox=True,
            width='100%'
    )

    #data = grid_response['data']
    selected = grid_response['selected_rows']   
    if selected:
        st.write('Selected rows')
        
        dfs = pd.DataFrame(selected)
        st.dataframe(dfs[dfs.columns[1:dfs.shape[1]]])

        dfs1 = transform(dfs[dfs.columns[1:dfs.shape[1]]],'Please select columns to save selected Table')
        #csv = convert_df1(dfs[dfs.columns[1:dfs.shape[1]]])
        csv = convert_df1(dfs1)
        

        st.download_button(
            label="Download data as CSV",
            data=csv,
            file_name=select_method+'_'+variant_spl[0]+'.csv',
            mime='text/csv',
        )

    
#ForeCast-BE results
def display_ForeCast_BE_res(data_snp,method,variant_spl):    
#if method == 'bystander_ABE8e_mean':
    #AgGrid(data_snp)
    st.markdown(table_edit,unsafe_allow_html=True)
    gb = GridOptionsBuilder.from_dataframe(data_snp)
    gb.configure_pagination(enabled=False)#,paginationAutoPageSize=False)#True) #Add pagination
    gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
    gb.configure_selection(selection_mode="multiple", use_checkbox=True)

    gb.configure_side_bar()
    gridOptions = gb.build()    

    grid_response = AgGrid(
        data_snp,
            height=200,
            gridOptions=gridOptions,
            enable_enterprise_modules=True,
            update_mode=GridUpdateMode.MODEL_CHANGED,
            data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
            fit_columns_on_grid_load=False,
            header_checkbox_selection_filtered_only=True,
            use_checkbox=True,
            width='100%'
    )


#def display_res(method,sep,rsid,select_method):
def display_res(method,sep,rsid,select_method):    
#if method == 'bystander_ABE8e_mean':
    #st.header(select_method+' with: '+method+' option')
    fnm=cwd+select_method+'/'+select_method+'_'+method+'.csv'
    #data = pd.read_csv(fnm, sep=',')
    data = pd.read_csv(fnm, sep=sep)
    
    #get snp data
    if len(variant_spl) > 1: #variant_spl has two components
        #data_snp = data[data['rs_id'].str.contains(variant_spl[1])]
        data_snp = data[data[rsid].str.contains(variant_spl[1])]
        data_snp[rsid]=variant_spl[0]+':'+data_snp[rsid]
    else:
        #data_snp = data[data['rs_id'].str.contains(variant_spl[0])]
        data_snp = data[data[rsid].str.contains(variant_spl[0])]
        data_snp[rsid]='NaN'+':'+data_snp[rsid]
    data_snp.reset_index(drop=True, inplace=True)
    
    if data_snp.shape[0]>0:
        df = transform(data_snp,'Please Select columns to save whole table')
        #fname = st_keyup("Please input file name to save Table", value='temp') #st.text_input('Please input file name to save Table', 'temp', live=True)
        
        csv = convert_df(df)

        st.download_button(
            label="Download Table as CSV file",
            data=csv,
            #file_name=fname+'.csv',
            file_name=method+'_'+variant_spl[0]+'.csv',
            mime='text/csv',
        )

    if len(variant_spl) > 1:
        f"""
            **Results for SNP: {variant_spl[1]} on GENE: {variant_spl[0]}**
        """
    else:
        f"""
            **Results for SNP: {variant_spl[0]} on GENE: NAN**
        """
    #AgGrid(data_snp)
    st.markdown(table_edit,unsafe_allow_html=True)
    gb = GridOptionsBuilder.from_dataframe(data_snp)
    gb.configure_pagination(enabled=False)#,paginationAutoPageSize=False)#True) #Add pagination
    gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
    gb.configure_selection(selection_mode="multiple", use_checkbox=True)

    gb.configure_side_bar()
    gridOptions = gb.build()    

    grid_response = AgGrid(
        data_snp,
            height=200,
            gridOptions=gridOptions,
            enable_enterprise_modules=True,
            update_mode=GridUpdateMode.MODEL_CHANGED,
            data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
            fit_columns_on_grid_load=False,
            header_checkbox_selection_filtered_only=True,
            use_checkbox=True,
            width='100%'
    )

    #data = grid_response['data']
    selected = grid_response['selected_rows']   
    if selected:
        st.write('Selected rows')
        
        dfs = pd.DataFrame(selected)
        st.dataframe(dfs[dfs.columns[1:dfs.shape[1]]])

        dfs1 = transform(dfs[dfs.columns[1:dfs.shape[1]]],'Please select columns to save selected Table')
        #csv = convert_df1(dfs[dfs.columns[1:dfs.shape[1]]])
        csv = convert_df1(dfs1)
        

        st.download_button(
            label="Download data as CSV",
            data=csv,
            file_name=method+'_'+variant_spl[0]+'.csv',
            mime='text/csv',
        )
    if data_snp.shape[0] > 0:
        if select_method=='BE-DICT':
            return  data_snp['Outp_seq']
        elif select_method=='PnB Designer':
            return  data_snp['Protospacer']
        else:
            return [0]
            
    else:
        return [0]




caution = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please note that not (necessarily) all variants are targetted.</p>'

tips = '<p style="font-family:sans-serif; color:Green; font-size: 18px;">Important Tool Tips:</p>'

table_edit = '<p style="font-family:sans-serif; color:Green; font-size: 16px;">About Table: Please note that table can be <b>sorted based on by clicking on any column</b> and <b>Multiple rows can be selected</b> (by clicking check box in first column) to save only those rows.</p>'



st.title('Single Base Editiors')
st.markdown('**Please select an option from the sidebar**')
expander = st.expander("NEW TOOLS CURRENTLY AVAILABLE/COMING SOON IN THE APP")   
#st.header('How to use this app')
expander.markdown(
    """
    **NEW: PLEASE NOTE THAT**  
    -   **[FORECasT-BE](https://partslab.sanger.ac.uk/FORECasT-BE) is now part of the App to predict guide efficiency for ABEs and CBEs**  
        -   ***It can only be invoked from the local version of the app, Please download the app and run your local machine to use this feature.***
        -   **Available under BE-DICT and PnB Designer options**
    -   **Another Prime Editing tool [PE-Designer](http://www.rgenome.net/be-designer/) is also part of the app**
    **Coming Soon.**
    -   **We are also testing a prime editing efficiency calculator [MinsePIE](https://github.com/julianeweller/MinsePIE) and will be available soon**
    """
    )


st.sidebar.image("logo-card-white.png", use_column_width=True)
#ReadMe = st.sidebar.radio('ReadME',value=True)

#Calc = st.sidebar.radio('Selection Menu')
Calc = st.sidebar.radio(
    "",
    ('ReadME', 'Tools Selection Menu'))

#if Calc:

#st.sidebar.title("Selection Menu")
if Calc == 'ReadME':
    #st.markdown("[Introduction](#Introduction)")    
    #st.markdown("[How do base editors work](#How-do-base-editors-work)")
    
    expander = st.expander("How to use this app")   
    #st.header('How to use this app')
    expander.markdown('Please note that all tools were run using Human Genome **(hg38)**. Each tool require **specific input format** (described for each tool selected from the sidebar when **Tools Selection Menue is enabled**) and **output results** in different formats **(with different columns based on method selected as described under each tool)**. Some of these tools also allow selection of various **endonucleases and related options**, their **reulsts are provided as radio controls** in the sidebar of this app under each tool.')
    expander.markdown('**Requirements:** 1) Python3.4 or higher and 2) streamlit 1.13')
    expander.markdown('To start this app, **unzip** the base_editor_app.zip in a folder of your choice')
    expander.markdown('Open shell terminal and **cd to base_editor_app folder**')
    expander.markdown('Type: **streamlit run baserditorsV3.py**, It will launch baseeditor app in the default browser')
    expander.markdown('**By default** README radio button is enabled to describe general information about the App and How to use it.')
    expander.markdown("- Please enable **Tools Selection Menu** radio control in the sidebar **to enable variant, tool and endonuclease options**")
    expander.markdown("- Select Desired Variant from the dropdown list")
    expander.markdown("- Select a Tool")
    expander.markdown("- Select one of the options **(if available)**")

     
    expander1 = st.expander('Introduction')
    
    expander1.markdown(
        """**TLDR**  
            This app **reviewes** popular single base quality estimators for a **[list](https://drive.google.com/file/d/1Sxb-Cc-epbs6vujQaX9wa5acqus0RW3q/view?usp=sharing) of rsIDs** per disease of interest based on CARD’s cross-NDD efforts. We filtered our candidate list of **base edit predictors** for those that are at least **semi-automated and reproducible** (no copy and pasting IDs or sequences one at a time).
            """
            )
    expander1.markdown('Clustered Regularly Interspaced Short Palindromic Repeat CRISPR/CRISPR-associated (Cas) systems, such as **Cas9 (type II endonuclease which recognises the 5"'"-NGG-3"'" PAM)** and **Cas12a (type V endonuclease which recognises the 5"'"-TTTV-3"'" PAM)** (also called Cpf1), are the primary tools used for genome editing. CRISPR/Cas9 based gene editing uses sequence-specific nucleases (Cas9 etc) and a sgRNA for precise gene knock-out/in whereas catalytically inactive Cas9 (dCas9) provides gene expression regulation via activation/inhibition (CRISPRa/i) and Cas9 nickase (nCas9) + sgRNA, by incorporating deaminases, enables single base editing. Finally nCas9 + prime editing gRNA (pegRNA) enables editing of all 12 possible base edits')
    expander1.markdown('**A CRISPR/Cas9 sytem** requires a custom single guide RNA (sgRNA) that contains a crRNA (a 20 nt sequence homologous to the region of interest that direct Cas9 (or dCas9 or Cas9 nickase) nuclease to the region of interest) and a Cas9 nuclease-recruiting sequence (tracrRNA). An ideal gRNA should maximize on-target activity **(cleavage efficiency)** while also minimizing potential off-target effects **(specificity)**.')
    expander1.markdown('**Current sgRNA design tools** (including HDR based and deaminase based) fall under three major categories:')
    expander1.markdown('- **Alignment-based:** candidate gRNAs are aligned and retrieved from the given genome by locating PAM')
    expander1.markdown('- **Hypothesis-driven/Rule-based:** sgRNA activity is predicted according to empirically derived, handcrafted rules (GC content, sequence preference etc)')
    expander1.markdown('- **Learning-based:** sgRNAs are scored by models trained on datasets of CRISPR experiments.')
    
    expander1.markdown('**Two categories of DNA base editors (BEs) are:**')
    
    expander1.markdown('- Cytosine base editors **(CBEs: C/G -> T/A converters)** and')
    expander1.markdown('- Adenine base editors **(ABEs: A/T -> G/C converters)** as shown in Figure below')
    image = Image.open('CBE_ABE.webp')
    expander1.image(image, caption='Cytosine and Adenine base editors. Figure from: https://www.nature.com/articles/s41573-020-0084-6')
    expander1.markdown('**Prime Editors (PEs)**')
    expander1.markdown('While base editors can only introduce 4 edits, PEs on the other hand can do all 12 edits using usual Cas9 (and its variants) and a gRNA called prime editing guide RNA (**pegRNA**). PEs use pegRNA consisting of a 20 nt guide sequence, a primer binding site (PBS) and a reverse transcriptase template (RTT). The guide directs the Cas enzyme to a target site, the PBS hybridizes to the opposite strand to prime the reverse transcriptase, and the RTT integrates the desired genomic alteration.')
    expander1.markdown('In this app we also tested a **prime editor** and an **RNA editor for gene knockdown** for these targets.')

    expander2 = st.expander('How does CRISPR-Cas9 (and base editing) System works')
    expander2.markdown('**CRISPR-Cas9** system consists of **two** key components (accomplishing three steps: Recognition, Cleavage, and Repair):')
    expander2.markdown("- **Recognition:** A single guide RNA (sgRNA which is composed of target-specific CRISPR RNA (crRNA) and an auxiliary trans-activating crRNA (trcrRNA) joined by linker loop) targeting Cas9 to a specific DNA locus")
    #expander2.markdown('- **Recognition:** A guide RNA (gRNA) that consists of a small piece of pre-designed RNA sequence (usually 20 bases complimentary to the target DNA sequence in the genome) and **guides** Cas9 to the right part of the genome.')
    expander2.markdown('- **Cleavage, and Repair**: A Cas9 enzyme (has six domains, REC I (responsible for binding guide RNA), REC II, Bridge Helix, PAM Interacting (confers PAM specificity and is responsible for initiating binding to target DNA), HNH and RuvC (each cut single-stranded DNA after 3rd base upstream of PAM)) that acts as a pair of ‘molecular scissors’ that **cut** the two strands of DNA at a specific location in the genome **so that bits of DNA can then be added or removed** using either non-homologous end joining **(NHEJ)** or homology-directed repair **(HDR)**.')
    expander2.markdown('CRISPR-Cas9 system, while efficient at knocking out genes, it is very inefficient at introducing single base changes and oftenly introduces random insertions and deletions **(indels)** during double-stranded breaks (DSB) repair. **Base editors** on the other drives specific, accurate, and permanent single nucleotide changes without introducing double-stranded DNA breaks.')
    expander2.markdown("**Base editing requires three elements:**")
    expander2.markdown("- A single guide RNA (sgRNA) for ABEs and CBEs or pegRNA for PEs")# which is composed of target-specific CRISPR RNA (crRNA) and an auxiliary trans-activating crRNA (trcrRNA) joined by linker loop) targeting Cas9 to a specific DNA locus")    
    expander2.markdown("- A Cas nickase (Cas9 with mutation in RuvC nuclease domain, which enables it to nick but not cleave DNA) or Cas fused to a deaminase that makes the edit.")
    expander2.markdown("- A target base for editing within the editing window specified by the Cas9 protein")
    #expander2.markdown('The Cas9 protein has six domains, REC I (responsible for binding guide RNA), REC II, Bridge Helix, PAM Interacting (**confers PAM specificity and is responsible for initiating binding to target DNA**), HNH and RuvC (**each cut single-stranded DNA after 3rd base upstream of PAM**). Cas9 and its variants are highly specific to various PAM sequences and have two endonuclease domains: the n-terminal RuvC-like nuclease domain and the HNH-like nuclease domain near the center of the protein')
    expander2.markdown('A whole range of **CBEs** and **ABEs** have been developed. Various CEBs ranging from **simple** deactivated Cas9 (dCas9)+cytidine deaminsae+uracil DNA glycosylase inhibitor (UGI) to improved single mutated Cas9 (nCas9)+cytidine deaminsae+uracil DNA glycosylase inhibitor (UGI) called BE3 systems and its variants such as Target-AID editors were developed. 4th generation BEs (called BE4, such as BE4max etc which focus on improving editors delivery to the nucleus) further minimize undesired base conversions that can happen with BE3.') 
    expander2.markdown('Similar to CBEs, **Adenine base editors (ABEs)** such as ABEmax, ABE4max, ABE8e and ABE8s were also developed.')
    #st.markdown('Similar to CBEs, adenine base editor such as ABEmax, ABE4max, ABE8e and ABE8s were also developed.')
    #st.markdown("**Key parameters for a good BE are:**")
    #st.markdown("- Editing efficiency: 4th generation base editiors **BE4max and ABE4max [2](https://www.nature.com/articles/nbt.4172), ABE8s [3](https://www.nature.com/articles/s41587-020-0491-6) and Target-AID (dual base) [4](https://www.nature.com/articles/s41587-020-0535-y)**")
    #st.markdown("- Editing efficiency")
    #st.markdown("- Minimal off-target effects")
    expander3 = st.expander("Scoring and Quality Matrices")
    expander3.markdown(
        """
        An ideal CRISPR/Cas9 (and its variants including base and prime editors) system should employ sgRNA (and pegRNA) that **maximize** on-target activity **(efficiency)** and **minimize** potential off-target effects **(specificity)**. Balancing these two requirements can be a challenging. Various sgRNA design tools **assist** in the selection of the best target sites available by **excluding undesirable targets based on predicted low efficiency or specificity**, saving resources and time in the experiment.
    
        **Efficiency (On-Target Activity):**
        
        Ideally, CRISPR/Cas9 protein scans the PAM sequence, and sgRNA (spacer sequence complementary to the targeted DNA sequence) recognises target loci and activates endonuclease activity to cleave specific sites. **sgRNA Cleavage efficiency**, however, varies greatly among different target sites and/or cell lines and **depends on several factors** including sgRNA sequence (sequence composition, nucleotide position, GC content), genetic and epigenetic features etc.
        
        Most sgRNA design tools employ a **combination of features** for efficinecy scoring. Broadly, these tools fall into tow categoris.
        - **Hypothesis-driven (rule-based) tools:** These tools employ **simple** sgRNA sequence metrices such as: GC content and position dependent specific sequence motifs. **Enhanced** sgRNA based metrices such as sequence, structural, and chromatin characteristics are also adapted in various tools.
        - **Machine-learning (and Deep-learning) tools:** Cleavage efficiency of sgRNA is a complex interplay of factors such as target sequence, cellular environment and experimental conditions. Therefore, simple rule-based system may not be adequate for choosing target sites and designing CRISPR gRNAs. Machine learning (deep-learning) tools employ different hand-curated (auto-detected) **feature sets** including sgRNA sequence, PAM, and/or adjacent nucleotides and window size on the target sequence to design sgRNA.
    """
    )
    expander3.markdown(
        """   
        **Efficiency models used in tools discussed in this App**
        
        **An sgRNA activity predictive model (An L1-regularized linear support vector machine - SVM) [Doench et. al 2014](https://www.nature.com/articles/nbt.3026):** ***Rule set 1*** 
        
        Based on sequence features of 1,841 sgRNAs (targeting all possible target sites in six endogenous mouse and three endogenous human genes), a predictive model of sgRNA activity was developed, as described below, to improve sgRNA design for gene editing and genetic screens. Model was trained using a total of 586 sequence and GC features as explained below.
        -   Rnaking sgRNAs from 0 (worst) - 1(best).
        -   Total Features: 586
            -   Sequence: 30bp sequence(4bp upstream + 20bp sgRNA protospacer + 3bp PAM + 3bp downstream) 
                -   Single nucleotide Features: 120 
                    -   4 bases (A,C,G,T) × 30 bp sequence
                -   Dinucleotide Features: 464
                    -   16 dinucleotides × 29 positions (of 30 bp sequence)
                -   2 GC-count features in the 20 nt of the sgRNA 
                    -   Deviations of GC-count below and above ten nucleotide
        -   sgRNA score:
            -   Assuming w_{ij} are model weights for the features i for a particular guide s_{j} and the intercept int. Then the sgRNA score f(s_{j}) is given via logistic regression as:
        """
    )
    expander3.markdown(latext1)
    
    expander3.markdown("After validation, final trained model using all available data used only 72 of the 586 features, including both GC-count features.")
    
    expander3.markdown(
        """                
        **Linear Predictive (sequence) model** (CRISPR/Cas9 knockout and CRISPRi/a) [Xu et al 2015](https://genome.cshlp.org/content/25/8/1147): 
        
        Models for predicting sgRNA efficiency for CRISPR knockout and CRISPRi/a based on systematic assessment of the effect of sequence (sgRNA protospacer, DNA target and downstream of DNA target) context based on six published data sets.
        -   Identification of efficient and inefficient sgRNAs in published data sets
            -   Essential genes: Genes whose deletion resulted in a growth disadvantage in genome-wide knockout experiments.
                -   Efficient sgRNAs:  all sgRNAs targeting these essential genes (two fold depletion).
                -   Inefficient sgRNAs:  all sgRNAs targeting these essential genes (very low depletion w.r.to positive control)
            -   Computed log odds ratio of (40 bp sequences (aligned at the PAM) including the 19-bp or 20-bp spacer targets as well as their 3′ and 5′ flanking DNAs) **nucleotide frequency between DNA sequences targeted by efficient and inefficient sgRNAs**.
        -   Feature selection (sgRNA nucleotides): 
            -   Signs of the odds ratios are concordant
            -   Magnitudes of the odds ratios are above a threshold (computed from statistical significance analysis) in all three sgRNA sets.
        -   Elastic-Net model was used to identify dominating nucleotide features: 28 sequence features were identified.        
        """
    )
    expander3.markdown(
        """     
                       
        **An sgRNA activity model [Moreno et. al 2015](https://www.nature.com/articles/nmeth.3543) (CRISPR/Cas9/Cas9 nickase Knock-in):** 
        
        
        - Sequence: 35 bp (6nt upstream flanking + 20nt sgRNA + 3nt PAM + 6nt downstream flanking)
        -   Total Features: 684
            -   Mononucleotides Features: 140
                -   4 bases (A,C,G,T) × 35 nt (6nt upstream, 20nt sgRNA, 3nt PAM, 6nt downstream)
                -   Dinucleotide Features: 16 dinucleotides × 34 positions (of 35 bp sequence)
                -   Using randomized logistic regression on 684 features with regularization and **selected 91 features**.
        
        """
    )
    expander3.markdown(
        """
        **An sgRNA activity predictive model [Doench et. al 2016](https://www.nature.com/articles/nbt.3437): Rule set 2**
        
        **Using sgRNA design [Rule set 1](https://www.nature.com/articles/nbt.3026)** to create human and mouse genome-wide libraries, performed positive and negative selection screens to derive additional rules for improved sgRNA design. Also developed a metric to **predict off-target sites**.
        -   **Features used (using one-hot encoding).** 
            -   Position Specific features: 80 order 1 and 320 order 2 
            -   Position-independent features: 4 order 1 and 16 order 2 
        -   **GC Count Features:**
            -   Number of Gs and Cs in the 20 mer
            -   Number of Gs and Cs >10 
        -   **PAM (NGGN) Features:**
            -   16 features for the two nucleotides in the N and N positions
        -   **Thermodynamic features:** Melting temperatures of the DNA version of the RNA guide sequence of the
            -   Entire 30-mer target site plus context)
            -   5 nucleotides immediately proximal to the PAM
            -   8 nucleotides adjacent to that (away from the PAM)
            -   5 nucleotides in turn adjacent to the 8 mer (again, away from the PAM)
            
        **A deep-learning-based (Convolutional Neural Network - CNN) based DeepCpf1 model for Cas12a/Cpf1 RNA [Kim et. al 2017](https://www.nature.com/articles/nmeth.4104):** 
        
        An automated feature engineering and prediction algorithm (based on data from 16,292 (experiment A) and 2,963 (experiment B) target sequences and 20-nt guide sequences) that automatically learn informative representations of target sequences relevant to Cpf1 activity profiles.
        -   Input: 34-bp target sequence
        -   One-hot encoding input layer: Representing each nucleotide A, C, G, and T as row number then 4-by-34 dimensional binary matrix represents the whole 34-bp target sequence. 
        -   Convolution layer: 80 filters of length 5 and rectified linear unit (ReLU) to the convolution outputs.
        -   The pooling layer: Computes the average in each of the non-overlapping windows of size 2, providing invariance to local shifts and reducing the number of parameters.
        -   Three fully connected layers of 80, 40, and 40 units, chromatin accessibility integration layer and regression output layer.
        -   The chromatin accessibility integration layer: It incorporates the sequence representations with the chromatin accessibility information of the target sequence
        """
    )
    #expander3.markdown('Model scores f(s_{j}) will fall into the range [0,1], and higher values predict higher activity')
    expander3.markdown('**Specificity (Off-Target Activity)**')
    expander3.markdown('Cas9 nucleases often cleaves unintended genomic sites due to sgRNAs recognising DNA sequences with a few mismatches (off-target cleavage). Two main methods are used to predict cutting Specificity of sgRNA:')
    expander3.markdown('- **Alignment-based methods:** sgRNAs are aligned to a given genome (often using traditional alignment tools such as Bowtie or BWA) and off-target sequences and sites are returned. This method is mainly used to find out all potential off-targets ***in-silico***')
    expander3.markdown(
    """        
        - **Scoring-based methods:** sgRNAs are **further ranked** using identified off-targets from alignment process:
            -   **Hypothesis-driven:** off-targets are scored based on the contribution of specific genome context factors (features such as those influencing the nonspecific binding of sgRNAs) **(MIT score)** and cutting frequency determination **(CFD score)** as sgRNA can also bind genome loci with non-canonical PAMs such as NAG, NCG and NGA.
            -   **Learning-based:** sgRNAs are scored and predicted from a training model that considers the different features (including PAM type, nucleotide composition, GC content, chromatin structure, DNA methylation, RNA secondary structure, etc) affecting specificity.
        - **An algorithm to predict off-target loci for specificity analyses [Hsu. et. al 2013](https://www.nature.com/articles/nbt.2647):**
            -   Generally, specificity of SpCas9-mediated DNA cleavage is sequence- and locus-dependent and governed by the quantity, position and identity of mismatching bases.
            -   Design guidelines to minimize off-target cleavage: 
                -   Potential ‘off-target’ genomic sequences: should follow these four constraints:
                    -   should not be followed by a PAM with either 5′-NGG or 5′-NAG sequences
                    -   Their global sequence similarity to the target sequence should be minimized, and guide sequences with genomic off-target loci that have fewer than three mismatches should be avoided
                    -   At least two mismatches should lie within the PAM-proximal region of the off-target site. 
                    -   A maximal number of mismatches should be consecutive or spaced less than four bases apart.
                **Off-target scoring.**
        -   **Off-target mismatch [Stemmer et. al 2015](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0124633) score [Doench et. al 2016](https://www.nature.com/articles/nbt.3437):** a value of 0 indicates no predicted off-target activity whereas a value of 1 indicates a perfect match
            -   Off-target search
                -   The search of off-target sites is carried out using Bowtie
            -   For each off-target site of any sgRNA a score is computed that indicates the likelihood of a stable sgRNA/DNA heteroduplex and is quantified as in Eq. 1:
            -   sgRNA target site score: The list of sgRNA target sites is ranked according to the number of predicted off-target sites and their potential deleterious effects on the respective off-target gene. The ranking is based on a single score that combines the number of off-target sites, the distribution of their mismatches and the distance to the closest annotated exon (Eq 2).
        - 
        """
    )
    expander3.markdown(off_target)
    expander3.markdown("where pos is the position of each mismatch counted from the 5' end")
    expander3.markdown(sgran_target_score)

    #expander3.markdown('- An important feature of the SpCas9 system is the PAM, which is a CRISPR-dependent and conserved DNA sequence motif adjacent to the target site, and is used by bacteria to distinguish between self and non-self DNA (24). Therefore, target recognition requires both base pairing to the gRNA sequence and the presence of the PAM (i.e. 5′-NGG-3′) adjacent to the targeted sequence (1). When the Cas9 binds with PAM and the target site pairs with the gRNA, a double-strand break (DSB) is caused between positions 17 and 18 of the 20-nt gRNA sequence (Figure 3) (1). Following the break, random insertions or deletions (indels) can be generated via the non-homologous end-joining (NHEJ) pathway, which is error-prone (gene knockout). Alternatively, a desired modification can be introduced through homology-directed repair (HDR) when provided with a DNA template (gene knock-in) (30,31).')
    #expander3.markdown('- The efficiency of DNA cleavage depends not only on the intrinsic nuclease activity, but also on target site accessibility and the affinity of DNA binding domain(s) (e.g. gRNA) to the target sequence. However, there is a lack of understanding on the exact behavior of the engineered Cas9 nuclease in living cells, especially regarding the dynamics of its interaction with DNA, and the cell cycle-dependent cleavage activity. Due to the limited biological knowledge, prediction of nuclease target accessibility and cleavage rates in living cells remains difficult. Therefore, experimental validation of target-site selection is necessary. Computational approaches can analyze and extract knowledge from large-scale CRISPR screens. Thus, they can help identify gRNA features modulating Cas9 activity as well as make plausible hypotheses regarding its mechanism of action.')
    #expander3.markdown('- gRNA sequence features')
    #expander3.markdown('- Protospacer adjacent motif')
    #expander3.markdown('- gRNA sequence motifs')
    #expander3.markdown('- Overall nucleotide usage, Position-specific nucleotide composition and Structural features')
    
    #expander3.markdown('TOOLS FOR GUIDE EFFICIENCY PREDICTION: ')
    expander4 = st.expander("CRISPR-Cas9 and Base editor tools reviewed")
    #st.header('CRISPR-Cas9 and Base editor tools reviewed')
    expander4.markdown(
    """
    We have reviewed a total of ***6 tools*** in the public domain which are **at least semi-automated and reproducible** (no copy and pasting IDs or sequences one at a time). These tools offer a wide range of options ranging from **HDR** based edits to improved **single base editors** to precise base editing such as **Prime editing**. Furthermore, many of these tools offer **variety of PAM sequences expanding the number of available target sites for base editing.**
    
    - [BE-DICT](http://130.60.24.130/page-set?actionID=5f8c494b8c854d0029ffa9d3)
        - An attention based deep learning algorithm for based editing outcomes prediction [Paper](https://www.nature.com/articles/s41467-021-25375-z).
        - Options: ABE8e, ABEmax, BE4max, Target-AID.
    - [ChopChop](https://chopchop.cbu.uib.no)
        - This tool offers various Endonucleaes (Cas9, nCas9, Cpf1 (also known as Cas12a and **only contains crRNA**), CasX (generates staggered double-stranded break) and **Cas13 (also known as C2c2)) RNA editor**) and PAM options. Results for following options are reported in this app:
        - Cas13a, CasX_TTCN, Cpf1_TTN, NGG (Cas9), Nickase_NGG, Nickase_NRG.
    - [E-CRISP](http://www.e-crisp.org/E-CRISP/)
        - This tool offers relaxed, medium and strict options for PAM sequence. 
    - [GuideScan2](https://guidescan.com)
        - This tool offers Cas9 and Cpf1 endonucleases with various options to filter out results. Results based on 30, 40 and 50bp (SNP location = (n/2)bp) input sequence range (for Cas9 and Cpf1) are reported in this app:
        - 30bp_cpf1, 30bp_NGG, 40bp_cpf1, 40bp_NGG, 50bp_cpf1, 50bp_NGG
    - [PnB Designer](https://fgcz-shiny.uzh.ch/PnBDesigner/)
        - This tool allows base editing as well as **prime editing**. Results reported in this app are based on:
        - Base_editing_guides, Nicking_guides, pegRNA_oligos
    - [SNP_CRISPR](https://www.flyrnai.org/tools/snp_crispr/web/)
        - This tool offers guides for  NGG and NAG PAM sequences and are reporoted in this app:
        - NGG, NAG
    **For more details on each tool, Please select select it from the sidebar menu under Tools Selection Menu**
    """
    )


else:
#if Calc == 'Selection Menu':
    #ReadMe = st.sidebar.checkbox('ReadME',value=False)
    select_variant = st.sidebar.selectbox(
        "Please select variant",
        variants_s
    )

    variant_spl=select_variant.split()
    st.sidebar.write('Please select A method')
    BE_DICT = st.sidebar.checkbox('BE-DICT',key=1)
    if BE_DICT:
        select_method_BEDICT='BE-DICT'
        method_bedict = st.sidebar.radio(
            "Please select an option",
            ('bystander_ABE8e_mean', 'bystander_ABEmax_mean_5','bystander_BE4max_mean','bystander_Target-AID_mean'))

    ChopChop = st.sidebar.checkbox('ChopChop',key=2)
    if ChopChop:
        select_method_ChopChop='ChopChop'
        method_chopchop = st.sidebar.radio(
            "Please select an option",
            ('Cas13a', 'CasX_TTCN','Cpf1_TTN','CRISPR-CAS9_NGG', 'Nickase_NGG','Nickase_NRG'))

    ECRISP = st.sidebar.checkbox('E-CRISP',key=3)
    
    GuideScan2 = st.sidebar.checkbox('GuideScan2',key=4)
    if GuideScan2:
        select_method_GuideScan2='GuideScan2'
        method_GuideScan2 = st.sidebar.radio(
            "Please select an option",
            ('30bp_cpf1', '30bp_NGG','40bp_cpf1', '40bp_NGG','50bp_cpf1', '50bp_NGG'))

    PE_DESIGNER1 = st.sidebar.checkbox('PE-Designer',key=5)
    if PE_DESIGNER1:
        select_method_PE_DESIGNER='PE-Designer'
        method_PE_DESIGNER = st.sidebar.radio(
            "Please select an option",
            ('NGG','NRG'))


    PnBDesigner = st.sidebar.checkbox('PnB Designer',key=6)
    if PnBDesigner:
        select_method_PnBDesigner='PnB Designer'
        method_PnBDesigner = st.sidebar.radio(
            "Please select an option",
            ('Base_editing_guides', 'Nicking_guides_PE3_PE3b','pegRNA_oligos'))

    SNPCRISPR = st.sidebar.checkbox('SNP_CRISPR',key=7)
    if SNPCRISPR:
        select_method_SNPCRISPR='SNP_CRISPR'
        method_SNPCRISPR = st.sidebar.radio(
            "Please select an option",
            ('NAG', 'NGG'))
    
    

    if BE_DICT and select_method_BEDICT == "BE-DICT":
        BEDICT(method_bedict,select_method_BEDICT)

    if ChopChop and select_method_ChopChop == "ChopChop":
        Chopchop(method_chopchop,select_method_ChopChop)


    if ECRISP:
        ecrisp('E-CRISP')
        #select_method_ChopChop='ChopChop'
        #method_chopchop = st.sidebar.radio(
        #    "Please select an option",
        #    ('Cas13a', 'CasX_TTCN','Cpf1_TTN','CRISPR-CAS9_NGG', 'Nickase_NGG','Nickase_NRG'))


    if GuideScan2 and select_method_GuideScan2 == "GuideScan2":
        Guidescan2(method_GuideScan2,select_method_GuideScan2)

    if PE_DESIGNER1 and select_method_PE_DESIGNER == "PE-Designer":
        PE_DESIGNER(method_PE_DESIGNER,select_method_PE_DESIGNER)

    if PnBDesigner and select_method_PnBDesigner=='PnB Designer':
        Pnbdesigner(method_PnBDesigner,select_method_PnBDesigner)

    if SNPCRISPR and select_method_SNPCRISPR=='SNP_CRISPR':
        SNPcrispr(method_SNPCRISPR,select_method_SNPCRISPR)
            
st.sidebar.image("DataTecnica_White.png", use_column_width=True)