Spaces:
Sleeping
Sleeping
import os | |
import numpy as np | |
import pandas as pd | |
import streamlit as st | |
import scanpy as sc | |
#import mpld3 | |
import matplotlib.pyplot as plt | |
#from mpl_toolkits.axes_grid1 import make_axes_locatable | |
#import matplotlib.gridspec as gridspec | |
#from sunbird.categorical_encoding import frequency_encoding | |
import seaborn as sns | |
plt.rcParams.update({'figure.autolayout': True}) | |
plt.rcParams['axes.linewidth'] = 0.0001 | |
from functions import pathway_analyses | |
#sc.settings.set_figure_params(dpi=80, facecolor='white',fontsize=4) | |
sc.settings.set_figure_params(dpi=80, facecolor='white',fontsize=12) | |
#disable st.pyplot warning | |
st.set_page_config(layout="wide") | |
st.markdown( | |
""" | |
<style> | |
.streamlit-expanderHeader { | |
font-size: x-large; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True, | |
) | |
m=st.markdown(""" | |
<style> | |
div.stTitle { | |
font-size:40px; | |
} | |
</style>""" | |
,unsafe_allow_html=True) | |
st.set_option('deprecation.showPyplotGlobalUse', False) | |
#load Data | |
cwd=os.getcwd()+'/'#+'data/' | |
#@st.cache_data | |
def get_data(): | |
if 'adata_annot' not in st.session_state or 'cell_type' not in st.session_state or 'broad_type' not in st.session_state: | |
adata_annot = sc.read_h5ad(cwd+'multiregion_brainaging_annotated.h5ad') | |
st.session_state['adata_annot'] = adata_annot | |
if 'genes_list' not in st.session_state: | |
genes=adata_annot.var.index | |
#genes_list=sorted(genes.unique()) | |
st.session_state['genes_list'] = sorted(genes.unique()) | |
if 'cell_type' not in st.session_state: | |
#cell_type=diff_fdr[diff_fdr.type=='cell_type']['tissue'] | |
#cell_type=sorted(cell_type.unique()) | |
anno=adata_annot.obs.new_anno | |
#cell_type=sorted(anno.unique()) | |
st.session_state['cell_type'] = sorted(anno.unique()) | |
if 'broad_type' not in st.session_state: | |
broad_celltype=adata_annot.obs.broad_celltype | |
#broad_type=sorted(broad_type.unique()) | |
st.session_state['broad_type'] = sorted(broad_celltype.unique()) | |
#Also load Go Terms | |
if 'go_table' not in st.session_state: | |
bp = pathway_analyses.read_pathways('pathway_databases/GO_Biological_Process_2021.txt') | |
go_bp_paths = bp.set_index(0) | |
go_bp_paths.fillna("", inplace=True) | |
go_bp_paths_dict = go_bp_paths.to_dict(orient='index') | |
gene_set_by_path = {key: [val for val in value.values() if val != ""] for key, value in go_bp_paths_dict.items()} | |
gene_set_by_path = pd.DataFrame.from_dict(gene_set_by_path, orient='index').transpose() | |
st.session_state['path_ways']=gene_set_by_path.columns | |
st.session_state['go_table']=gene_set_by_path | |
#done load Data | |
#st.title('Single nuclei atlas of human aging in brain regions') | |
st.title('Brain Age Browser') | |
#genes_list,adata_annot=get_data() | |
get_data() | |
tab1, tab2,readme = st.tabs(["Gene Expression by CellType", "Age associations for multiple genes", "README"]) | |
data = np.random.randn(10, 1) | |
with tab1: | |
with st.form(key='columns_in_form'): | |
#c1, c2, c3 = st.columns([4,4,2]) | |
c1, c2 = st.columns(2) | |
with c1: | |
selected_gene = st.selectbox( | |
'Please select a gene', | |
st.session_state['genes_list']) | |
with c2: | |
selected_celltype = st.selectbox( | |
'Please select CellType', | |
st.session_state['cell_type'] | |
) | |
Updated=st.form_submit_button(label = 'Go') | |
if not isinstance(selected_gene, type(None)) and not isinstance(selected_celltype, type(None)) and Updated: | |
fig = plt.figure(figsize=(6, 6)) | |
col1,col2= st.columns([1,1]) | |
with col1: | |
fig11, axx11 = plt.subplots(figsize=(5,5)) | |
sc.pl.umap(st.session_state['adata_annot'], color='new_anno', title='', legend_loc='on data',legend_fontsize='8', frameon=False,show=False, ax=axx11) | |
st.pyplot(fig11) | |
with col2: | |
fig12, axx12 = plt.subplots(figsize=(5,5)) | |
#sc.pl.umap(st.session_state['adata_annot'], color='new_anno', title='', legend_loc='on data', frameon=False,show=False, ax=axx2) | |
sc.pl.umap(st.session_state['adata_annot'], color=selected_gene, title='', legend_loc='best', frameon=False,show=False,legend_fontsize='xx-small', ax=axx12)#,vmax='p99') | |
#plt.xticks(rotation = 45) | |
#plt.colorbar(cax=cax) | |
axx12.set_title(selected_gene, fontsize=12) | |
st.pyplot(fig12) | |
#Subset Younv and Old | |
adata_Young = st.session_state['adata_annot'][st.session_state['adata_annot'].obs['Age_group']=='young'] | |
adata_Old = st.session_state['adata_annot'][st.session_state['adata_annot'].obs['Age_group']=='old'] | |
#Young/Old but for cell_type | |
adata_YoungAst = adata_Young[adata_Young.obs['new_anno']==selected_celltype] | |
adata_OldAst = adata_Old[adata_Old.obs['new_anno']==selected_celltype] | |
# # #Young/Old but for cell_type | |
# # adata_YoungAst = adata_Young[adata_Young.obs['broad_celltype']==selected_celltype] | |
# # adata_OldAst = adata_Old[adata_Old.obs['broad_celltype']==selected_celltype] | |
#Young | |
dot_size=.05 | |
font_sz=4 | |
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2,figsize=(3,3)) | |
#plt.subplots_adjust(wspace=0, hspace=0) | |
#plt.tight_layout() | |
#fig.tight_layout(rect=[0, 0.03, 1, 0.95]) #[left, bottom, right, top] | |
sc.pl.umap(adata_Young, color=selected_gene, title="", legend_loc='right margin', color_map='viridis',frameon=True,show=False,size=dot_size, legend_fontsize='xx-small',colorbar_loc=None,ax=ax1) | |
ax1.set_title('All', fontsize=font_sz) | |
ax1.set_ylabel('Young', fontsize=font_sz) | |
#ax1.set_xlabel('', fontsize=0) | |
ax1.get_xaxis().set_visible(False) | |
sc.pl.umap(adata_YoungAst, color=selected_gene, title="", legend_loc='right margin', color_map='viridis', frameon=True,show=False,size=dot_size,legend_fontsize='xx-small',colorbar_loc=None, ax=ax2) | |
ax2.set_title(selected_celltype, fontsize=font_sz) | |
#ax2.set_xlabel('', fontsize=0) | |
ax2.set_ylabel('', fontsize=0) | |
ax2.get_xaxis().set_visible(False) | |
ax2.get_yaxis().set_visible(False) | |
sc.pl.umap(adata_Old, color=selected_gene, title="", legend_loc='right margin', color_map='viridis', frameon=True,show=False,size=dot_size,legend_fontsize='xx-small', colorbar_loc="bottom",ax=ax3) | |
#ax3.set_xlabel('x-label', fontsize=12) | |
ax3.set_ylabel('Old', fontsize=font_sz) | |
#ax3.set_xlabel('', fontsize=0) | |
ax3.get_xaxis().set_visible(False) | |
#ax3.get_title().set_visible(False) | |
sc.pl.umap(adata_OldAst, color=selected_gene, title="", legend_loc='right margin', color_map='viridis', frameon=True,show=False,size=dot_size,legend_fontsize='xx-small', colorbar_loc="bottom",ax=ax4) | |
#ax4.set_xlabel('', fontsize=0) | |
#ax4.set_ylabel('', fontsize=0) | |
ax4.get_xaxis().set_visible(False) | |
ax4.get_yaxis().set_visible(False) | |
#ax4.get_title().set_visible(False) | |
plt.suptitle(selected_gene+"\ncoefficient estimate: 0.24 | BH-FDR p=7.91x$10^{-3}$",fontsize=font_sz) | |
#plt.subplots_adjust(top=0.95) | |
#plt.tight_layout(pad=0, w_pad=0, h_pad=0) | |
#plt.tight_layout() | |
st.pyplot(plt.gcf()) | |
with tab2: | |
with st.form(key='multiselect_form'): | |
c1, c2, c3 = st.columns([4,4,2]) | |
with c1: | |
multi_genes = st.multiselect( | |
'Select Genes List', | |
st.session_state['genes_list']) | |
with c2: | |
go_term = st.selectbox( | |
'Select GO Term', | |
st.session_state['path_ways']) | |
with c3: | |
Choice = st.radio( | |
"", | |
('Gene Set','GO Term')) | |
Updated_tab2=st.form_submit_button(label = 'Show Results') | |
if not isinstance(multi_genes, type(None)) and Updated_tab2: | |
if Choice=='Gene Set': | |
multi_genes = np.sort(multi_genes) | |
else: | |
multi_genes=st.session_state['go_table'].loc[:,go_term] | |
multi_genes=multi_genes.dropna().values | |
multi_genes=np.sort(multi_genes) | |
#multi_genes=['WNT3', 'VPS13C', 'VAMP4', 'UBTF', 'UBAP2', 'TMEM175', 'TMEM163', 'SYT17', 'STK39', 'SPPL2B', 'SIPA1L2', 'SH3GL2', 'SCARB2', 'SCAF11', 'RPS6KL1', 'RPS12', 'RIT2', 'RIMS1', 'RETREG3', 'PMVK', 'PAM', 'NOD2', 'MIPOL1', 'MEX3C', 'MED12L', 'MCCC1', 'MBNL2', 'MAPT', 'LRRK2', 'KRTCAP2', 'KCNS3', 'KCNIP3', 'ITGA8', 'IP6K2', 'GPNMB', 'GCH1', 'GBA', 'FYN', 'FCGR2A', 'FBRSL1', 'FAM49B', 'FAM171A2', 'ELOVL7', 'DYRK1A', 'DNAH17', 'DLG2', 'CTSB', 'CRLS1', 'CRHR1', 'CLCN3', 'CHRNB1', 'CAMK2D', 'CAB39L', 'BRIP1', 'BIN3', 'ASXL3', 'SNCA'] | |
######### THIS IS FOR CLUSTERMAP | |
# figxx = plt.subplots(figsize=(5, 5)) | |
# hmpdat=st.session_state['adata_annot'][:, multi_genes] #.to_df() | |
# #st.write(hmpdat) | |
# samples=hmpdat.obs.new_anno | |
# dfh = pd.DataFrame(hmpdat.X.toarray(), columns = multi_genes) | |
# dfh=dfh.T | |
# dfh.columns=samples.values.to_list() | |
# sns.clustermap(dfh) | |
# st.pyplot(plt.gcf()) | |
###### | |
#col1,col2= st.columns([1,1]) | |
#fig_szx=2*len(st.session_state['cell_type']) | |
#fig_szy=100*len(multi_genes) | |
#with col1: | |
fig11, axx11 = plt.subplots(figsize=(5, 5)) | |
#sc.pl.umap(st.session_state['adata_annot'], color='new_anno', title='', legend_loc='on data',legend_fontsize='8', frameon=False,show=False, ax=axx11) | |
axx11=sc.pl.dotplot(st.session_state['adata_annot'], multi_genes,'new_anno',size_title='Fraction of\n Expressing Cells',colorbar_title='Mean\nExpression',cmap='BuPu',swap_axes=True,show=False,vmax=5) | |
#st.pyplot(fig11) | |
#st.pyplot(plt.gcf().set_size_inches(fig_szx, fig_szy)) | |
st.pyplot(plt.gcf()) | |
# with col2: | |
fig12, axx12 = plt.subplots(figsize=(5, 5)) | |
#sc.pl.umap(st.session_state['adata_annot'], color='new_anno', title='', legend_loc='on data', frameon=False,show=False, ax=axx2) | |
#sc.pl.umap(st.session_state['adata_annot'], color=selected_gene, title=selected_gene, legend_loc='best', frameon=False,show=False,legend_fontsize='xx-small', ax=axx12)#,vmax='p99') | |
axx12=sc.pl.heatmap(st.session_state['adata_annot'], multi_genes, groupby='new_anno', vmin=-1, vmax=1, cmap='BuPu', dendrogram=True, swap_axes=True, show_gene_labels=True,var_group_rotation=45)#,ax=ax2) | |
plt.xticks(rotation = 45) | |
#plt.xticks(rotation = 45) | |
#st.pyplot(fig12) | |
#st.pyplot(plt.gcf().set_size_inches(fig_szx, fig_szy)) | |
st.pyplot(plt.gcf()) | |
####### | |
#multi_genes=['WNT3', 'VPS13C', 'VAMP4', 'UBTF', 'UBAP2', 'TMEM175', 'TMEM163', 'SYT17', 'STK39', 'SPPL2B', 'SIPA1L2', 'SH3GL2', 'SCARB2', 'SCAF11', 'RPS6KL1', 'RPS12', 'RIT2', 'RIMS1', 'RETREG3', 'PMVK', 'PAM', 'NOD2', 'MIPOL1', 'MEX3C', 'MED12L', 'MCCC1', 'MBNL2', 'MAPT', 'LRRK2', 'KRTCAP2', 'KCNS3', 'KCNIP3', 'ITGA8', 'IP6K2', 'GPNMB', 'GCH1', 'GBA', 'FYN', 'FCGR2A', 'FBRSL1', 'FAM49B', 'FAM171A2', 'ELOVL7', 'DYRK1A', 'DNAH17', 'DLG2', 'CTSB', 'CRLS1', 'CRHR1', 'CLCN3', 'CHRNB1', 'CAMK2D', 'CAB39L', 'BRIP1', 'BIN3', 'ASXL3', 'SNCA'] | |
#multi_genes=np.sort(multi_genes) | |
# fig, ax1 = plt.subplots(1,2) | |
# sc.pl.dotplot(st.session_state['adata_annot'], multi_genes,'new_anno',size_title='Fraction of\n Expressing Cells',colorbar_title='Mean\nExpression',cmap='RdBu_r',show=False, ax=ax1[0]) | |
# st.pyplot(plt.gcf().set_size_inches(10, 10)) | |
# fig, ax2 = plt.subplots(1,2) | |
# ax2=sc.pl.heatmap(st.session_state['adata_annot'], multi_genes, 'new_anno', vmin=-1, vmax=1, cmap='RdBu_r', dendrogram=True, swap_axes=True) | |
# st.pyplot(plt.gcf().set_size_inches(10, 10)) | |
#ax[0]=sc.pl.dotplot(st.session_state['adata_annot'],multi_genes,'new_anno',show=False) | |
#fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20,4), gridspec_kw={'wspace':0.9}) | |
#commented these-working ones | |
# fig, (ax1) = plt.subplots(1, 1, figsize=(20,4), gridspec_kw={'wspace':0.9}) | |
# #ax = plt.subplot() | |
# ax1_dict=sc.pl.dotplot(st.session_state['adata_annot'], multi_genes,'new_anno',size_title='Fraction of\n Expressing Cells',colorbar_title='Mean\nExpression',cmap='BuPu',swap_axes=True,show=False, ax=ax1,vmax=5) | |
# #ax_dict=sc.pl.dotplot(st.session_state['adata_annot'], multi_genes,'new_anno',size_title='Fraction of\n Expressing Cells',colorbar_title='Mean\nExpression',cmap='RdBu_r',swap_axes=True,show=False, ax=ax) | |
# st.pyplot(plt.gcf().set_size_inches(10, 15)) | |
# #ax2_dict=sc.pl.dotplot(st.session_state['adata_annot'], multi_genes,'Sex',size_title='Fraction of\n Expressing Cells',colorbar_title='Mean\nExpression',cmap='RdBu_r',swap_axes=True,show=False, ax=ax2) | |
# fig, (ax2) = plt.subplots(1, 1, figsize=(20,4), gridspec_kw={'wspace':0.9}) | |
# #ax2_dict=sc.pl.matrixplot(st.session_state['adata_annot'], multi_genes, 'new_anno', vmin=-1, vmax=1, show=False, cmap='BuPu',dendrogram=True, swap_axes=True, ax=ax2) | |
# #sc.pl.heatmap(adata_annot, genes_lst, groupby='new_anno', vmin=-1, vmax=1, cmap='RdBu_r', dendrogram=True, swap_axes=True, figsize=(11,4)) | |
# ax2_dict=sc.pl.heatmap(st.session_state['adata_annot'], multi_genes, groupby='new_anno', vmin=-1, vmax=1, cmap='BuPu', dendrogram=True, swap_axes=True)#,ax=ax2) | |
# st.pyplot(plt.gcf().set_size_inches(10, 15)) | |
with readme: | |
expander = st.expander("How to use this app") | |
#st.header('How to use this app') | |
expander.markdown('Please select **Results Menue** checkbox from the sidebar') | |
expander.markdown('Select a Gene from the dropdown list') | |
expander.markdown('A table showing all reference gudies from three LISTS will appear in the main panel') | |
expander.markdown('To see results for each of the selected reference guide from ListA, ListB and ListC, Please select respective checkbox') | |
expander.markdown('Results are shown as two tables, **MATCHED** and **MUTATED** guides tables and **NOT FOUND** table if guides are not found in GRCh38 and LR reference fasta files') | |
expander.markdown('**MATCHED** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**') | |
expander.markdown('**MUTATED** guides table shows the genomic postion in GRCh38 and LR Fasta file along other fields. **If a guide is found in GRCh38 but not in LR fasta, then corresponding columns will be NA**') | |
expander1 = st.expander('Introduction') | |
expander1.markdown( | |
""" This app helps navigate all probable genomic **miss-matched/Mutations (upto 2 bp)** for a given sgRNA (from 3 lists of CRISPRi dual sgRNA libraries) in GRCh38 reference fasta and a Reference fasta generated from BAM generated against KOLF2.1J longread data. | |
""" | |
) | |
expander1.markdown('Merged bam file was converted to fasta file using following steps:') | |
expander1.markdown('- samtools mpileup to generate bcf file') | |
expander1.markdown('- bcftools to generate vcf file') | |
expander1.markdown('- bcftools consensus to generate fasta file') | |
expander1.markdown('A GPU based [Cas-OFFinder](http://www.rgenome.net/cas-offinder/) tool was used to find off-target sequences (upto 2 miss-matched) for each geiven reference guide against GRCh38 and LR fasta references.') | |
css = ''' | |
<style> | |
.stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p { | |
font-size:1.5rem; | |
} | |
</style> | |
''' | |
st.markdown(css, unsafe_allow_html=True) |