import streamlit as st import pandas as pd from os import listdir import plotly.graph_objects as go gene_names_eve = [file.split('_')[0] for file in listdir('./CPT1_score_EVE_set/')] gene_names_no_eve1 = [file.split('_')[0] for file in listdir('./CPT1_score_no_EVE_set_1/')] gene_names_no_eve2 = [file.split('_')[0] for file in listdir('./CPT1_score_no_EVE_set_2/')] st.subheader('CPT-1') st.markdown('Cross-protein transfer learning for variant effect prediction') st.markdown('This is a lookup tool for the variant effect preditions of CPT-1 for 18,602 human proteins, initially released with the manuscript "Cross-protein transfer learning substantially improves zero-shot prediction of disease variant effects (2022)".') # Input query gene gene = st.selectbox( 'Which gene/protein are you interested in? (in UniProt gene names)', gene_names_eve + gene_names_no_eve1 + gene_names_no_eve2) if st.button('Show results'): # Read the gene file if gene in gene_names_eve: pred = pd.read_csv('./CPT1_score_EVE_set/' + gene + '_HUMAN.csv.gz', compression = 'gzip') elif gene in gene_names_no_eve1: pred = pd.read_csv('./CPT1_score_no_EVE_set_1/' + gene + '_HUMAN.csv.gz', compression = 'gzip') else: pred = pd.read_csv('./CPT1_score_no_EVE_set_2/' + gene + '_HUMAN.csv.gz', compression = 'gzip') # Plot heatmap mat = pred.copy() mat['Mutant amino acid'] = mat['mutant'].str[-1] mat['Position on protein sequence'] = mat['mutant'].str[:-1] mat = mat.set_index('mutant') mat['Position on protein sequence'] = pd.Categorical( mat['Position on protein sequence'], categories = mat['Position on protein sequence'].unique(), ordered = True) mat = mat.pivot(index='Mutant amino acid', columns='Position on protein sequence', values = mat.columns[0]) fig = go.Figure() fig.add_trace( go.Heatmap(z = mat, y = mat.index, x = mat.columns, colorbar = dict(title='Variant effect'), colorscale=[[0, '#6FA8DC'], [0.3, '#CFE2F3'], [0.5, '#FFFFFF'], [0.7, '#F4CCCC'], [1.0, '#BA1111']], hovertemplate='Wild-type amino acid and position: %{x}\
Mutant amino acid: %{y}\
Prediction: %{z}') ) fig.update_layout( title_text = "CPT-1 variant effect prediction for " + gene + " (red: pathogenic, blue: benign)", xaxis = dict( title = 'Position on protein sequence', rangeslider = dict(visible=True) ), yaxis = dict( title = 'Mutant amino acid' ), yaxis_nticks = mat.shape[0], height = 600 ) st.plotly_chart(fig, theme = "streamlit", use_container_width = True, height = 600) # Generate download link st.download_button('Download CSV', pred.set_index('mutant').to_csv().encode('utf-8'), gene + '_CPT_score.csv', 'text/csv') # Citation st.markdown('
' '
Citation
' 'Jagota, M.\*, Ye, C.\*, Rastogi, R., Albors, C., Koehl, A., Ioannidis, N., and Song, Y.S.†' '"Cross-protein transfer learning substantially improves zero-shot prediction of disease variant effects", ' 'bioRxiv (2022)
' '\*These authors contributed equally to this work.
' '† To whom correspondence should be addressed: yss@berkeley.edu
' 'DOI: https://doi.org/10.1101/2022.11.15.516532', unsafe_allow_html = True)