import streamlit as st
import pandas as pd
from os import listdir
import plotly.graph_objects as go
gene_names_eve = [file.split('_')[0] for file in listdir('./CPT1_score_EVE_set/')]
gene_names_no_eve1 = [file.split('_')[0] for file in listdir('./CPT1_score_no_EVE_set1/')]
gene_names_no_eve2 = [file.split('_')[0] for file in listdir('./CPT1_score_no_EVE_set2/')]
st.subheader('CPT-1')
st.markdown('Cross-protein transfer learning for variant effect prediction')
st.markdown('This is a lookup tool for the variant effect preditions of CPT-1 for 18,602 human proteins, initially released with the manuscript "Cross-protein transfer learning substantially improves zero-shot prediction of disease variant effects (2022)".')
# Input query gene
gene = st.selectbox(
'Which gene/protein are you interested in?',
gene_names_eve + gene_names_no_eve1 + gene_names_no_eve2)
if st.button('Show results'):
# Read the gene file
if gene in gene_names_eve:
pred = pd.read_csv('./CPT1_score_EVE_set/' + gene + '_HUMAN.csv.gz', compression = 'gzip')
elif gene in gene_names_no_eve1:
pred = pd.read_csv('./CPT1_score_no_EVE_set1/' + gene + '_HUMAN.csv.gz', compression = 'gzip')
else:
pred = pd.read_csv('./CPT1_score_no_EVE_set2/' + gene + '_HUMAN.csv.gz', compression = 'gzip')
# Plot heatmap
mat = pred.copy()
mat['Mutant amino acid'] = mat['mutant'].str[-1]
mat['Position on protein sequence'] = mat['mutant'].str[:-1]
mat = mat.set_index('mutant')
mat['Position on protein sequence'] = pd.Categorical(
mat['Position on protein sequence'],
categories = mat['Position on protein sequence'].unique(),
ordered = True)
mat = mat.pivot(index='Mutant amino acid', columns='Position on protein sequence', values = mat.columns[0])
fig = go.Figure()
fig.add_trace(
go.Heatmap(z = mat, y = mat.index, x = mat.columns,
colorbar = dict(title='Variant effect'),
colorscale=[[0, '#6FA8DC'],
[0.3, '#CFE2F3'],
[0.5, '#FFFFFF'],
[0.7, '#F4CCCC'],
[1.0, '#BA1111']],
hovertemplate='Wild-type amino acid and position: %{x}\
Mutant amino acid: %{y}\
Prediction: %{z}