|
import streamlit as st |
|
import pandas as pd |
|
from os import listdir |
|
import plotly.graph_objects as go |
|
|
|
gene_names_eve = [file.split('_')[0] for file in listdir('./CPT1_score_EVE_set/')] |
|
gene_names_no_eve1 = [file.split('_')[0] for file in listdir('./CPT1_score_no_EVE_set_1/')] |
|
gene_names_no_eve2 = [file.split('_')[0] for file in listdir('./CPT1_score_no_EVE_set_2/')] |
|
|
|
st.subheader('CPT-1') |
|
st.markdown('Cross-protein transfer learning for variant effect prediction') |
|
st.markdown('This is a lookup tool for the variant effect preditions of CPT-1 for 18,602 human proteins, initially released with the manuscript "Cross-protein transfer learning substantially improves zero-shot prediction of disease variant effects (2022)".') |
|
|
|
|
|
gene = st.selectbox( |
|
'Which gene/protein are you interested in? (in UniProt gene names)', |
|
gene_names_eve + gene_names_no_eve1 + gene_names_no_eve2) |
|
|
|
if st.button('Show results'): |
|
|
|
if gene in gene_names_eve: |
|
pred = pd.read_csv('./CPT1_score_EVE_set/' + gene + '_HUMAN.csv.gz', compression = 'gzip') |
|
elif gene in gene_names_no_eve1: |
|
pred = pd.read_csv('./CPT1_score_no_EVE_set_1/' + gene + '_HUMAN.csv.gz', compression = 'gzip') |
|
else: |
|
pred = pd.read_csv('./CPT1_score_no_EVE_set_2/' + gene + '_HUMAN.csv.gz', compression = 'gzip') |
|
|
|
|
|
mat = pred.copy() |
|
mat['Mutant amino acid'] = mat['mutant'].str[-1] |
|
mat['Position on protein sequence'] = mat['mutant'].str[:-1] |
|
mat = mat.set_index('mutant') |
|
mat['Position on protein sequence'] = pd.Categorical( |
|
mat['Position on protein sequence'], |
|
categories = mat['Position on protein sequence'].unique(), |
|
ordered = True) |
|
mat = mat.pivot(index='Mutant amino acid', columns='Position on protein sequence', values = mat.columns[0]) |
|
fig = go.Figure() |
|
fig.add_trace( |
|
go.Heatmap(z = mat, y = mat.index, x = mat.columns, |
|
colorbar = dict(title='Variant effect'), |
|
colorscale=[[0, '#6FA8DC'], |
|
[0.3, '#CFE2F3'], |
|
[0.5, '#FFFFFF'], |
|
[0.7, '#F4CCCC'], |
|
[1.0, '#BA1111']], |
|
hovertemplate='Wild-type amino acid and position: %{x}\ |
|
<br>Mutant amino acid: %{y}\ |
|
<br>Prediction: %{z}<extra></extra>') |
|
) |
|
fig.update_layout( |
|
title_text = "CPT-1 variant effect prediction for " + gene + " (red: pathogenic, blue: benign)", |
|
xaxis = dict( |
|
title = 'Position on protein sequence', |
|
rangeslider = dict(visible=True) |
|
), |
|
yaxis = dict( |
|
title = 'Mutant amino acid' |
|
), |
|
yaxis_nticks = mat.shape[0], |
|
height = 600 |
|
) |
|
st.plotly_chart(fig, theme = "streamlit", use_container_width = True, height = 600) |
|
|
|
|
|
st.download_button('Download CSV', |
|
pred.set_index('mutant').to_csv().encode('utf-8'), |
|
gene + '_CPT_score.csv', |
|
'text/csv') |
|
|
|
|
|
st.markdown('</br>' |
|
'<h5> Citation </h5>' |
|
'Jagota, M.\*, Ye, C.\*, Rastogi, R., Albors, C., Koehl, A., Ioannidis, N., and Song, Y.S.†' |
|
'"Cross-protein transfer learning substantially improves zero-shot prediction of disease variant effects", ' |
|
'bioRxiv (2022) </br>' |
|
'\*These authors contributed equally to this work. </br>' |
|
'† To whom correspondence should be addressed: yss@berkeley.edu </br>' |
|
'DOI: https://doi.org/10.1101/2022.11.15.516532', |
|
unsafe_allow_html = True) |
|
|