base_editors / app.py
syedislamuddin's picture
first commit
a642e58
raw
history blame contribute delete
No virus
34.2 kB
#from turtle import shape
import streamlit as st
#from st_keyup import st_keyup
import pandas as pd
import numpy as np
from st_aggrid import AgGrid, GridOptionsBuilder,GridUpdateMode,DataReturnMode
import time
import os
from PIL import Image
#if 'select_method' not in st.session_state:
# st.session_state['select_method'] = 'temp'
#if 'method' not in st.session_state:
# st.session_state['method'] = 'temp'
def transform(df,str):
# Select columns
#cols = st.multiselect('Please select columns to save current Table as csv file',
cols = st.multiselect(str,
df.columns.tolist(),
df.columns.tolist()
)
df = df[cols]
return df
@st.cache
def convert_df1(df):
return df.to_csv(index=False).encode('utf-8')
def convert_df(df):
return df.to_csv().encode('utf-8')
def display_res(method,sep,rsid):
#if method == 'bystander_ABE8e_mean':
st.header(select_method+' with: '+method+' option')
fnm=cwd+select_method+'/'+select_method+'_'+method+'.csv'
#data = pd.read_csv(fnm, sep=',')
data = pd.read_csv(fnm, sep=sep)
#get snp data
if len(variant_spl) > 1: #variant_spl has two components
#data_snp = data[data['rs_id'].str.contains(variant_spl[1])]
data_snp = data[data[rsid].str.contains(variant_spl[1])]
data_snp[rsid]=variant_spl[0]+':'+data_snp[rsid]
else:
#data_snp = data[data['rs_id'].str.contains(variant_spl[0])]
data_snp = data[data[rsid].str.contains(variant_spl[0])]
data_snp[rsid]='NaN'+':'+data_snp[rsid]
data_snp.reset_index(drop=True, inplace=True)
if data_snp.shape[0]>0:
df = transform(data_snp,'Please Select columns to save whole table')
#fname = st_keyup("Please input file name to save Table", value='temp') #st.text_input('Please input file name to save Table', 'temp', live=True)
csv = convert_df(df)
st.download_button(
label="Download Table as CSV file",
data=csv,
#file_name=fname+'.csv',
file_name=method+'_'+variant_spl[0]+'.csv',
mime='text/csv',
)
if len(variant_spl) > 1:
f"""
**Results for SNP: {variant_spl[1]} on GENE: {variant_spl[0]}**
"""
else:
f"""
**Results for SNP: {variant_spl[0]} on GENE: NAN**
"""
#AgGrid(data_snp)
st.markdown(table_edit,unsafe_allow_html=True)
gb = GridOptionsBuilder.from_dataframe(data_snp)
gb.configure_pagination(enabled=False)#,paginationAutoPageSize=False)#True) #Add pagination
gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
gb.configure_selection(selection_mode="multiple", use_checkbox=True)
gb.configure_side_bar()
gridOptions = gb.build()
grid_response = AgGrid(
data_snp,
height=200,
gridOptions=gridOptions,
enable_enterprise_modules=True,
update_mode=GridUpdateMode.MODEL_CHANGED,
data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
fit_columns_on_grid_load=False,
header_checkbox_selection_filtered_only=True,
use_checkbox=True,
width='100%'
)
#data = grid_response['data']
selected = grid_response['selected_rows']
if selected:
st.write('Selected rows')
dfs = pd.DataFrame(selected)
st.dataframe(dfs[dfs.columns[1:dfs.shape[1]]])
dfs1 = transform(dfs[dfs.columns[1:dfs.shape[1]]],'Please select columns to save selected Table')
#csv = convert_df1(dfs[dfs.columns[1:dfs.shape[1]]])
csv = convert_df1(dfs1)
st.download_button(
label="Download data as CSV",
data=csv,
file_name=method+'_'+variant_spl[0]+'.csv',
mime='text/csv',
)
st. set_page_config(layout="wide")
cwd=os.getcwd()+'/'+'data/'
#get snps list
snps = pd.read_csv("SNPS.csv")
variants=snps['GENE:SNP'].unique()
variants_s=sorted(variants,key=len)
caution = '<p style="font-family:sans-serif; color:Red; font-size: 18px;">Please note that not (necessarily) all variants are targetted.</p>'
tips = '<p style="font-family:sans-serif; color:Green; font-size: 18px;">Important Tool Tips:</p>'
table_edit = '<p style="font-family:sans-serif; color:Green; font-size: 16px;">About Table: Please note that table can be <b>sorted based on by clicking on any column</b> and <b>Multiple rows can be selected</b> (by clicking check box in first column) to save only those rows.</p>'
st.title('Single Base Editiors')
st.sidebar.image("logo-card-white.png", use_column_width=True)
#ReadMe = st.sidebar.radio('ReadME',value=True)
#Calc = st.sidebar.radio('Selection Menu')
Calc = st.sidebar.radio(
"",
('ReadME', 'Selection Menu'))
#if Calc:
#st.sidebar.title("Selection Menu")
if Calc == 'ReadME':
#st.markdown("[Introduction](#Introduction)")
#st.markdown("[How do base editors work](#How-do-base-editors-work)")
st.header('How to use this app')
st.markdown('Please note that all tools were run using Human Genome **(hg38)**. Each tool require **specific input format** (described for each tool selected from the sidebar when **Selection Menue is enabled**) and **output results** in different formats **(with different columns based on method selected as described under each tool)**. Some of these tools also allow selection of various **endonucleases and related options**, their **reulsts are provided as radio controls** in the sidebar of this app under each tool.')
st.markdown('**Requirements:** 1) Python3.4 or higher and 2) streamlit 1.13')
st.markdown('To start this app, **unzip** the base_editor_app.zip in a folder of your choice')
st.markdown('Open shell terminal and **cd to base_editor_app folder**')
st.markdown('Type: **streamlit run baserditorsV3.py**, It will launch baseeditor app in the default browser')
st.markdown('**By default** README radio button is enabled to describe general information about the App and How to use it.')
st.markdown("- Please enable **Selection Menu** radio control in the sidebar **to enable variant, tool and endonuclease options**")
st.markdown("- Select Desired Variant from the dropdown list")
st.markdown("- Select a Tool from the dropdown list")
st.markdown("- Select one of the options **(if available)**")
st.header('Introduction:')
st.markdown('This app **reviewes** popular single base quality estimators for a **list [1](https://drive.google.com/file/d/1Sxb-Cc-epbs6vujQaX9wa5acqus0RW3q/view?usp=sharing) of rsIDs** per disease of interest based on CARD’s cross-NDD efforts. We filtered our candidate list of **base edit predictors** for those that are at least **semi-automated and reproducible** (no copy and pasting IDs or sequences one at a time.')
st.markdown('Two categories of DNA base editors **(BEs)** are, a) cytosine base editors **(CBEs: C/G -> T/A converters)** and b) adenine base editors **(ABEs: A/T -> G/C converters)**, as shown in Figure below. While base editors can only introduce 4 edits, **Prime Editors** on the other hand can do all 12 edits using usual Cas9 (and its variants) and a gRNA called prime editing guide RNA (**pegRNA**). We also tested a **prime editor** and an **RNA editor for gene knockdown** for these targets')
image = Image.open('CBE_ABE.webp')
st.image(image, caption='Cytosine and Adenine base editors. Figure from: https://www.nature.com/articles/s41573-020-0084-6')
st.header('How do base editors work')
st.markdown("**Base editing requires three elements:**")
st.markdown("- A Cas nickase (Cas9 with mutation in RuvC nuclease domain, which enables it to nick but not cleave DNA) or Cas fused to a deaminase that makes the edit.")
st.markdown("- A single guide RNA (sgRNA which is composed of target-specific CRISPR RNA (crRNA) and an auxiliary trans-activating crRNA (trcrRNA) joined by linker loop) targeting Cas9 to a specific DNA locus")
st.markdown("- A target base for editing within the editing window specified by the Cas9 protein")
st.markdown('The Cas9 protein has six domains, REC I (responsible for binding guide RNA), REC II, Bridge Helix, PAM Interacting (**confers PAM specificity and is responsible for initiating binding to target DNA**), HNH and RuvC (**each cut single-stranded DNA after 3rd base upstream of PAM**). Cas9 and its variants are highly specific to various PAM sequences and have two endonuclease domains: the n-terminal RuvC-like nuclease domain and the HNH-like nuclease domain near the center of the protein')
st.markdown('A whole range of CBEs and ABEs have been developed. Various CEBs ranging from **simple** deactivated Cas9 (dCas9)+cytidine deaminsae+uracil DNA glycosylase inhibitor (UGI) to improved single mutated Cas9 (nCas9)+cytidine deaminsae+uracil DNA glycosylase inhibitor (UGI) called BE3 systems and its variants such as Target-AID editors were developed. 4th generation BEs (called BE4, such as BE4max etc which focus on improving editors delivery to the nucleus) further minimize undesired base conversions that can happen with BE33.')
st.markdown('Similar to CBEs, **Adenine base editors (ABEs)** such as ABEmax, ABE4max, ABE8e and ABE8s were also developed.')
#st.markdown('Similar to CBEs, adenine base editor such as ABEmax, ABE4max, ABE8e and ABE8s were also developed.')
#st.markdown("**Key parameters for a good BE are:**")
#st.markdown("- Editing efficiency: 4th generation base editiors **BE4max and ABE4max [2](https://www.nature.com/articles/nbt.4172), ABE8s [3](https://www.nature.com/articles/s41587-020-0491-6) and Target-AID (dual base) [4](https://www.nature.com/articles/s41587-020-0535-y)**")
#st.markdown("- Editing efficiency")
#st.markdown("- Minimal off-target effects")
st.header('Base editor tools reviewed')
st.markdown('We have reviewed a total of 6 tools in the public domain which are **at least semi-automated and reproducible** (no copy and pasting IDs or sequences one at a time). These tools offer a wide range of options ranging from **HDR** based edits to improved **single base editors** to precise base editing such as **Prime editing**. Furthermore, many of these tools offer variety of PAM sequences expanding the number of available target sites for base editing')
"""
- [BE-DICT](http://130.60.24.130/page-set?actionID=5f8c494b8c854d0029ffa9d3)
- An attention based deep learning algorithm for based editing outcomes prediction [Paper](https://www.nature.com/articles/s41467-021-25375-z).
- Options: ABE8e, ABEmax, BE4max, Target-AID.
- [ChopChop](https://chopchop.cbu.uib.no)
- This tool offers various Endonucleaes (Cas9, nCas9, Cpf1 (also known as Cas12a and **only contains crRNA**), CasX (generates staggered double-stranded break) and **Cas13 (also known as C2c2)) RNA editor**) and PAM options. Results for following options are reported in this app:
- Cas13a, CasX_TTCN, Cpf1_TTN, NGG (Cas9), Nickase_NGG, Nickase_NRG.
- [E-CRISP](http://www.e-crisp.org/E-CRISP/)
- This tool offers relaxed, medium and strict options for PAM sequence.
- [GuideScan2](https://guidescan.com)
- This tool offers Cas9 and Cpf1 endonucleases with various options to filter out results. Results based on 30, 40 and 50bp (SNP location = (n/2)bp) input sequence range (for Cas9 and Cpf1) are reported in this app:
- 30bp_cpf1, 30bp_NGG, 40bp_cpf1, 40bp_NGG, 50bp_cpf1, 50bp_NGG
- [PnB Designer](https://fgcz-shiny.uzh.ch/PnBDesigner/)
- This tool allows base editing as well as **prime editing**. Results reported in this app are based on:
- Base_editing_guides, Nicking_guides, pegRNA_oligos
- [SNP_CRISPR](https://www.flyrnai.org/tools/snp_crispr/web/)
- This tool offers guides for NGG and NAG PAM sequences and are reporoted in this app:
- NGG, NAG
"""
else:
#if Calc == 'Selection Menu':
#ReadMe = st.sidebar.checkbox('ReadME',value=False)
select_variant = st.sidebar.selectbox(
"Please select variant",
variants_s
)
variant_spl=select_variant.split()
select_method = st.sidebar.selectbox(
"Please select a Tool",
("BE-DICT", "ChopChop","E-CRISP","GuideScan2","PnB Designer","SNP_CRISPR")
)
if select_method == "ChopChop":
method = st.sidebar.radio(
"Please select an option",
('Cas13a', 'CasX_TTCN','Cpf1_TTN','CRISPR-CAS9_NGG', 'Nickase_NGG','Nickase_NRG'))
if select_method == "BE-DICT":
method = st.sidebar.radio(
"Please select an option",
('bystander_ABE8e_mean', 'bystander_ABEmax_mean_5','bystander_BE4max_mean','bystander_Target-AID_mean'))
if select_method == "GuideScan2":
method = st.sidebar.radio(
"Please select an option",
('30bp_cpf1', '30bp_NGG','40bp_cpf1', '40bp_NGG','50bp_cpf1', '50bp_NGG'))
if select_method == "PnB Designer":
method = st.sidebar.radio(
"Please select an option",
('Base_editing_guides', 'Nicking_guides_PE3_PE3b','pegRNA_oligos'))
if select_method == "SNP_CRISPR":
method = st.sidebar.radio(
"Please select an option",
('NAG', 'NGG'))
#BE-DICT
if select_method == "BE-DICT":
st.markdown("**Summary**")
st.markdown("BE-DICT predicts base editiong outcomes for **4 commonly used base editors (BEs)**. It uses an attention-based deep learning algorithm (based on transformer-encoder architecture) trained on high-throughput target library (of 28,394 target sequences) screens to predict single base editing (Both **Adenine** {A.T -> G.C} and **Cytosine** {C.G -> T.A} **Base Editors**) outcomes.")
"""
- Adenine Base Editors (ABEs)
- Based on Adenine deaminase ecTad7.10 **(ABEmax)** and ecTadA-8e **(ABE8e)**
- Cytosine Base Editors (CBEs)
- Based on Cytosine deaminase rAPOPEC1 **(CBE4max)** and **Target-AID**
"""
st.markdown("Most base editors convert target bases in a ~5- nucleotide region within the protospacer target sequence and undesired **bystander** editing of additional C or A bases in the editing window are common.")
st.markdown("**All results shown here are based on bystander models for each ABE and CBE.**")
st.write("[BE-DICT Web App](http://130.60.24.130/page-set?actionID=5f8c494b8c854d0029ffa9d3)")
st.write("[BE-DICT Paper](https://www.nature.com/articles/s41467-021-25375-z)")
st.markdown(caution,unsafe_allow_html=True)
st.markdown("- Input: 2-column csv: col1=Inp_seq, col2=seq_id, where **Inp_seq is a 20 bp** target sequence and seq_id is an identification.")
st.markdown("- Output: 4 column csv file")
st.markdown("- **Columns of interest**: Output_seq and Pred_score columns **(Higher is better)**.")
st.markdown("**Batch mode** can be run from [here](http://130.60.24.130/page?actionID=607552549609a200293b663f)")
st.markdown("**Please note that: Only one of all possible alleles is used to generate this output.**")
st.markdown(tips,unsafe_allow_html=True)
st.markdown('This tool uses an **attention based deep learning framework** for base predictions and employ four different algorithms/models for prediction: ABEMax, BE4max, ABE8e, Target-AID.')
st.markdown('**Please note that this tool only targets NGG PAM**')
display_res(method,',','rs_id')
#ChopChop
if select_method == "ChopChop":
st.markdown("**Summary**")
st.markdown("ChopChop is a versatile tool that identifies CRISPR–Cas single guide RNA (sgRNA) targets for DNA and RNA including targeted enrichment of loci for long-read sequencing for over **200** genomes and **3** transcriptomes. It offers a wide range of selection of **CRISPR effectors** (Cas9, CasX or Cas13), **Species**, and **Purpose** (knockout, knockdown, activation, repression, enrichment) alongside a variety of **Options** including selection of specific region, PAM sequences, various efficiency measures, primers and many more.")
st.markdown("**Please note that not all options results in an efficienc score (0 is reported in efficiency column).**")
st.write("[ChopChop Web App](https://chopchop.cbu.uib.no)")
st.write("[ChopChop Paper](https://academic.oup.com/nar/article/47/W1/W171/5491735)")
st.markdown(caution,unsafe_allow_html=True)
st.markdown("- Input: A text file containing chr:start-end per line for each snp. Ex: chr1:152220450-152220451")
st.markdown("- Output: A tab separated text file")
st.markdown("- **Columns of interest**: Target sequence and Efficiency (**higher the better**). Please note that not all options have Efficiency defined.[Ref](https://chopchop.cbu.uib.no/instructions)")
st.markdown("**Instructions to run Batch mode** can be found [here](https://bitbucket.org/valenlab/chopchop/src/master/)")
st.markdown(tips,unsafe_allow_html=True)
"""
- This tool offers sgRNA design for:
- **DNA using:**
- CRISPR/Cas9 system for knockout, knockin, activation, repression and nanopore enrichment.
- CRISPR/Cpf1 or CasX system for knockout, activation, repression and nanopore enrichment.
- CRISPR/Cas9 nickase system for knockout and knockin.
- TALEN system for knockout.
- **RNA using:**
- CRISPR/Cas13 (c2c2) for knockdown.
**This tool also offers a variety of PAM sequences and other filtering options.**
"""
st.markdown("**Please note that the tool was run for CRISPR/cas9 for NGG (knock-out), CRISPR/cas9-nickase (knock-out) for NGG and NRG (R=A or G), CRISPR/cpf1 for TTN, CRISPR/CasX for TTCN PAM and CRISPR/cas13(c2c2).**")
display_res(method,'\t','snp_id')
#E-CRISP
if select_method == "E-CRISP":
st.markdown("**Summary**")
st.markdown("E-CRISP is used to design gRNA sequences **(for 12 organisms)**. E-CRISP can also be used to reevaluate CRISPR constructs for on- or off-target sites and targeted genomic loci. It identifies target sequences complementary to the gRNA ending in a 3ʹ protospacer-adjacent motif (PAM), N(G or A)G and uses a fast indexing approach to find binding sites and a binary interval tree for rapid annotation of putative gRNA target sites.")
st.markdown("**Off-target** effects and target-site homology are evaluated using Bowtie2 aligner. Designs are **shown** in the output if the number of **off-targets does not exceed a user-specified threshold**. **More than one** design targeting a desired locus are **ranked** according to on-target specificity and number of off-targets.")
st.write("[E-CRISP Web App](http://www.e-crisp.org/E-CRISP/)")
st.write("[E-CRISP Paper](https://www.nature.com/articles/nbt.3026)")
st.markdown(caution,unsafe_allow_html=True)
"""
- Input: Multiple lines provided in the Input fasta sequence edit box in the webapp **[here](http://www.e-crisp.org/E-CRISP/index.html)** in the following format
- Line1: rs12726330
- Line2: CGGGACATGGAAGAGGTCTGGACCAGGGTACTGGGAAGGCGCTCGGAGGA
- Line3: rs76763715
- Line4: CCAGCCGACCACATGGTACAGGAGGTTCTAGGGTAAGGACAAAGGCAAAG
- and so on
"""
st.markdown("- Output: A tab separated .tab file")
st.markdown("- **Columns of interest**: Efficiency Score (E Score, **Higher the better**) [Ref](https://www.nature.com/articles/nbt.3026) and Specificity Score (S Score, **Higher the better** (max = 100))")
st.markdown(tips,unsafe_allow_html=True)
"""
- This tool offers single or paired sgRNA and:
- **Options for PAM:**
- **Relaxed**
- **Medium:**
- **Strict**
- **Options for Design:**
- knockdown.
- knockin.
- N/C terminal tagging.
- CRISPRi.
- CRISPRa.
- **Other filtering options.**
"""
st.markdown("**Please note that the result reported here are for PAM=NGG**")
st.header(select_method)
fnm=cwd+select_method+'/'+select_method+'_NGG'+'.csv'
data = pd.read_csv(fnm, sep=',')
#get snp data
#data_snp = data[data['Name'].str.contains(variant_spl[0])]
if len(variant_spl) > 1: #variant_spl has two components
#data_snp = data[data['rs_id'].str.contains(variant_spl[1])]
data_snp = data[data['Name'].str.contains(variant_spl[1])]
data_snp[rsid]=variant_spl[0]+':'+data_snp[rsid]
else:
#data_snp = data[data['rs_id'].str.contains(variant_spl[0])]
data_snp = data[data['Name'].str.contains(variant_spl[0])]
data_snp['Name']='NaN'+':'+data_snp['Name']
data_snp.reset_index(drop=True, inplace=True)
data_snp.reset_index(drop=True, inplace=True)
if data_snp.shape[0]>0:
df = transform(data_snp,'Please Select columns to save whole table')
#fname = st.text_input('Please input file name to save Table', 'temp')
#fname = st_keyup("Please input file name to save Table", value='temp')
csv = convert_df(df)
st.download_button(
label="Download Table as CSV file",
data=csv,
file_name=select_method+'_'+variant_spl[0]+'.csv',#fname+'.csv',
mime='text/csv',
)
#st.table(data_snp)
if len(variant_spl) > 1:
f"""
**Results for SNP: {variant_spl[0]} on GENE: {variant_spl[1]}**
"""
else:
f"""
**Results for SNP: {variant_spl[0]} on GENE: NAN**
"""
#AgGrid(data_snp)
st.markdown(table_edit,unsafe_allow_html=True)
gb = GridOptionsBuilder.from_dataframe(data_snp)
gb.configure_pagination(enabled=False)#,paginationAutoPageSize=False)#True) #Add pagination
gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
gb.configure_selection(selection_mode="multiple", use_checkbox=True)
gb.configure_side_bar()
gridOptions = gb.build()
grid_response = AgGrid(
data_snp,
height=200,
gridOptions=gridOptions,
enable_enterprise_modules=True,
update_mode=GridUpdateMode.MODEL_CHANGED,
data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
fit_columns_on_grid_load=False,
header_checkbox_selection_filtered_only=True,
use_checkbox=True,
width='100%'
)
#data = grid_response['data']
selected = grid_response['selected_rows']
if selected:
st.write('Selected rows')
dfs = pd.DataFrame(selected)
st.dataframe(dfs[dfs.columns[1:dfs.shape[1]]])
dfs1 = transform(dfs[dfs.columns[1:dfs.shape[1]]],'Please select columns to save selected Table')
#csv = convert_df1(dfs[dfs.columns[1:dfs.shape[1]]])
csv = convert_df1(dfs1)
st.download_button(
label="Download data as CSV",
data=csv,
file_name=select_method+'_'+variant_spl[0]+'.csv',
mime='text/csv',
)
#GuideScan2
if select_method == "GuideScan2":
st.markdown("**Summary**")
st.markdown("GuideScan2 employes Cas9 (tracrRNA and crRNA) and Cas12a, previously known as cpf1, (requires only crRNA) for sgRNA design for 8 organisms. It is a memory efficient and improved version of GuideScan that enables construction of high-specificity gRNA databases with reduced off-target effects.")
st.markdown("CRISPR-Cas9 targets a 20-nucleotide spacer sequence at the end of the gRNA that is complementary to a DNA protospacer sequence followed immediately at the 3’ end by a PAM of the form NGG (more efficient targeting) or NAG (less efficient); here N stands for a ‘wildcard’, i.e. can match any nucleotide. Other natural and engineered CRISPR-Cas systems can **vary in PAM sequence, PAM position with respect to the protospacer sequence, and requirements on the level of similarity between gRNA and the target.**")
st.markdown("Given a genomic region, the task of gRNA design is to find gRNAs that can target anywhere in that region. Many potential gRNAs can target at multiple locations in the genome with varying efficiency. Typically a gRNA is designed to target a particular location with **perfect complementarity** with all other targets of this gRNA are being **off-targets**. **Goal** of gRNA design is typically to **maximize gRNA efficiency at the primary target site while minimizing off-targeting.**")
st.markdown("Variants and extensions of the gRNA design task include: paired gRNA design to select two gRNAs targeting flanking sites of a genomic region of interest; saturation experiment design to exhaustively select all gRNAs expected to target a selected region of interest; and library design to select a small number of the most effective gRNAs for each of hundreds or thousands of regions of interest.")
st.write("[GuideScan2 Web App](https://guidescan.com)")
st.write("[GuideScan2 Paper](https://www.biorxiv.org/content/10.1101/2022.05.02.490368v1)")
st.markdown(caution,unsafe_allow_html=True)
"""
- Input: Line delimited Genomic intervals (or DNA sequence) as a text file in the webapp **[here](https://guidescan.com/)** in the following format (of genomic range 30bp, 40bp etc):
- Line1: chr10:11676698-11676728
- Line2: chr1:152220435-152220465
- and so on
"""
st.markdown("- Output: A csv file containing all gRNAs within the genomic regions provided in the input file")
st.markdown("- **Columns of interest**: Cutting efficiency (**Higher the better**), Specificity (**Higher the better**) [Ref](https://www.biorxiv.org/content/10.1101/2022.05.02.490368v1.full.pdf)")
st.markdown(tips,unsafe_allow_html=True)
"""
- This tool offers sgRNA design for:
- **CRISPR/Cas9**
- **CRISPR/Cpf1**
- **Please note that this tool work best for genomic intervals >30bp.**
"""
st.markdown("**Please note that the software was run with Cas9 (NGG PAM) and cpf1 (TTG PAM) option with all other options left as default.**")
display_res(method,',','query')
#PnB Designer
if select_method == "PnB Designer":
st.markdown("**Summary**")
st.markdown("DNA base editors (BEs), cytosine base editors which employes cytidine-deaminase (CBEs: C/G -> T/A converters) and adenine base editors which employes Adenine-deaminase (ABEs: A/T -> G/C converters) **can only introduce 4 edits** via gRNA, Prime Editors **(PEs)**, employing Cas9 nickase fused to an engineered reverse transcriptase via a gRNA called prime editing guide RNA (pegRNA), on the other hand **can do all 12 edits.** To introduce a modification in the genome, PEs use pegRNA consisting of a 20 nt guide sequence, a primer binding site (PBS) and a reverse transcriptase template (RTT). The guide directs the Cas enzyme to a target site, the PBS hybridizes to the opposite strand to prime the reverse transcriptase, and the RTT integrates the desired genomic alteration.")
st.markdown("Optimized PE2, called PE3 and PE3b with reduced off-targets are used")
st.markdown("PnB Designer allows design of pegRNAs for PEs and guide RNAs for CBE and the most recent ABEs such as ABEmax and ABE8e. PnB Designer makes it easy to design targeting guide RNAs for single or multiple targets on a variant or reference genome from organisms (and non-model organisms or synthetic constructs) spanning multiple kingdoms. It has been used PnB Designer to design candidate pegRNAs to model all human mutations in ClinVar")
st.markdown("**PnB Designer enables design of pegRNAs for all known disease causing mutations available in ClinVar**")
st.markdown("Nicking guides for the PE3 and PE3b systems are designed and filtered to provide a suitable selection of gRNAs. For PE3, only nicking guides 40–100 nt up/downstream of the initial nick are considered. For PE3b, only PAM sequences on the complementary strand that partially overlap with the PE2 PAM or protospacer sequence are displayed.")
st.write("[PnB Designer Web App](https://fgcz-shiny.uzh.ch/PnBDesigner/)")
st.write("[PnB Designer Paper](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-021-04034-6)")
st.markdown(caution,unsafe_allow_html=True)
"""
- Input: Multiple lines provided in as a csv file in the webapp **[here](https://fgcz-shiny.uzh.ch/PnBDesigner/)** in the following format:
- Prime editing:
- varinat, chromosome num, genomic location, Edit, gene orientation, OBS, RTT
- Ex: rs7412, 19, 44908822, insA, +, 13, 13
- Base editing
- varinat, chromosome num, genomic location, SNO, gene orientation, OBS, RTT
- Ex: rs7412, 19, 44908822, C>T, +
"""
st.markdown("- Output: A csv file")
st.markdown(tips,unsafe_allow_html=True)
"""
- This tool can be run in two modes:
- **Base editing mode:**
- Does not allow A>T or G>C, dels, or insertions.
- Only **180**/414 variants could be targeted.
- - **Columns of interest**: Protospacer, PAM and Base Editor (the system for producing the base edit). **There is no score**.
- **Prime editing mode:**
- Requires two guides: detailed in two files.
- pegRNA oligos for cloning.
- **Score: Higer is better**.
- Nicking guides: the corresponding nicking guides
"""
st.markdown("**Please note that the tool was run in Base editing and Prime editing modes. Corresponding nicking guides are also reported here.**")
display_res(method,',','query')
#SNP_CRISPR
if select_method == "SNP_CRISPR":
st.markdown("**Summary**")
st.markdown("SNP-CRISPR designs sgRNAs targeting specific SNPs or indels containing loci (for human, mouse, rat, fly and zebrafish genomes) by facilitating the design of sgRNAs that target specific variants and provides all possible CRISPR-Cas9 target sites in the given genomic region with required parameters, allowing users to select an optimal sgRNA. It provides efficiency scores and off-target information for sgRNAs targeting sequences with and without SNPs and/or indels of interest in the same genomic region.")
"""
**Design:**
- SNP-CRISPR validates the input reference sequences and **warn if the submitted reference sequences does not match**, which might reflect a different version of the genome assembly being used in the user input vs. SNP-CRISPR and re-constructs the template sequence, swapping the reference nucleotide with the variant nucleotide for SNPs, while inserting or deleting the corresponding fragment for indel type variants.
- Computes potential variant-targeting sgRNAs based on availability of PAM sequences in the neighboring region since the presence of a PAM sequence (NGG or NAG) is one of the few requirements for binding.
- sgRNA designs that contain four or more consecutive thymine residues, which can result in termination of RNA transcription by RNA polymerase III, are filtered out.
- Computes an efficiency score (Housden et al. 2015) and a specificity score calculated based on BLAST results against the reference genome.
- Finally, all possible sgRNAs are provided to the user along with specificity and efficiency scores, without further filtering.
- For identification of the best variant-specific sgRNAs, we provide information about both sgRNAs targeting specific variants and sgRNAs targeting the reference sequence in the same region. The efficiency score and an off-target score are provided, and the positions of relevant SNPs or indels in the sgRNA are included so that users can select the most suitable sgRNA or filter out less optimal ones.
"""
st.write("[SNP_CRISPR Web App](https://www.flyrnai.org/tools/snp_crispr/web/)")
st.write("[SNP_CRISPR Paper](https://academic.oup.com/g3journal/article/10/2/489/6026318)")
st.markdown(caution,unsafe_allow_html=True)
"""
- Input: Multiple lines provided in as a (6 columns) csv file uploaded to the webapp **[here](https://www.flyrnai.org/tools/snp_crispr/web/)** in the following format:
- varinat, chromosome, position, strand, reference, variant
- Ex: rs7412, 19, 44908822, C, +, T
"""
st.markdown("- Output: A csv file")
st.markdown("- **Columns of interest**: Housden Efficiency Score [Ref](https://www.ncbi.nlm.nih.gov/pubmed/26350902) (Range from 1.47-12.32 **(higher is better, > 5 recommended))** and Off Target Score (Range from 0-5441.73 (lower is better, < 1 recommended))")
st.markdown(tips,unsafe_allow_html=True)
"""
- This tool can design guides for:
- **NGG.**
- **NAG.**
- **Target multiple variants within the same guide.**
- Public variant data sets or user-identified variants.
"""
st.markdown("**Please note that the software was run for NAG and NGG PAM sequences only.**")
display_res(method,',','Gene')
st.sidebar.image("DataTecnica_White.png", use_column_width=True)