Spaces:

songlab
/

CPT

Sleeping

CPT / app.py

cz-ye

Minor fix

bd3d7ad over 1 year ago

3.66 kB

	import streamlit as st
	import pandas as pd
	from os import listdir
	import plotly.graph_objects as go

	gene_names_eve = [file.split('_')[0] for file in listdir('./CPT1_score_EVE_set/')]
	gene_names_no_eve1 = [file.split('_')[0] for file in listdir('./CPT1_score_no_EVE_set_1/')]
	gene_names_no_eve2 = [file.split('_')[0] for file in listdir('./CPT1_score_no_EVE_set_2/')]

	st.subheader('CPT-1')
	st.markdown('Cross-protein transfer learning for variant effect prediction')
	st.markdown('This is a lookup tool for the variant effect preditions of CPT-1 for 18,602 human proteins, initially released with the manuscript "Cross-protein transfer learning substantially improves zero-shot prediction of disease variant effects (2022)".')

	# Input query gene
	gene = st.selectbox(
	'Which gene/protein are you interested in?',
	gene_names_eve + gene_names_no_eve1 + gene_names_no_eve2)

	if st.button('Show results'):
	# Read the gene file
	if gene in gene_names_eve:
	pred = pd.read_csv('./CPT1_score_EVE_set/' + gene + '_HUMAN.csv.gz', compression = 'gzip')
	elif gene in gene_names_no_eve1:
	pred = pd.read_csv('./CPT1_score_no_EVE_set_1/' + gene + '_HUMAN.csv.gz', compression = 'gzip')
	else:
	pred = pd.read_csv('./CPT1_score_no_EVE_set_2/' + gene + '_HUMAN.csv.gz', compression = 'gzip')

	# Plot heatmap
	mat = pred.copy()
	mat['Mutant amino acid'] = mat['mutant'].str[-1]
	mat['Position on protein sequence'] = mat['mutant'].str[:-1]
	mat = mat.set_index('mutant')
	mat['Position on protein sequence'] = pd.Categorical(
	mat['Position on protein sequence'],
	categories = mat['Position on protein sequence'].unique(),
	ordered = True)
	mat = mat.pivot(index='Mutant amino acid', columns='Position on protein sequence', values = mat.columns[0])
	fig = go.Figure()
	fig.add_trace(
	go.Heatmap(z = mat, y = mat.index, x = mat.columns,
	colorbar = dict(title='Variant effect'),
	colorscale=[[0, '#6FA8DC'],
	[0.3, '#CFE2F3'],
	[0.5, '#FFFFFF'],
	[0.7, '#F4CCCC'],
	[1.0, '#BA1111']],
	hovertemplate='Wild-type amino acid and position: %{x}\
	<br>Mutant amino acid: %{y}\
	<br>Prediction: %{z}<extra></extra>')
	)
	fig.update_layout(
	title_text = "CPT-1 variant effect prediction for " + gene + " (red: pathogenic, blue: benign)",
	xaxis = dict(
	title = 'Position on protein sequence',
	rangeslider = dict(visible=True)
	),
	yaxis = dict(
	title = 'Mutant amino acid'
	),
	yaxis_nticks = mat.shape[0],
	height = 600
	)
	st.plotly_chart(fig, theme = "streamlit", use_container_width = True, height = 600)

	# Generate download link
	st.download_button('Download CSV',
	pred.set_index('mutant').to_csv().encode('utf-8'),
	gene + '_CPT_score.csv',
	'text/csv')

	# Citation
	st.markdown('</br>'
	'<h5> Citation </h5>'
	'Jagota, M.\, Ye, C.\, Rastogi, R., Albors, C., Koehl, A., Ioannidis, N., and Song, Y.S.&dagger;'
	'"Cross-protein transfer learning substantially improves zero-shot prediction of disease variant effects", '
	'bioRxiv (2022) </br>'
	'\*These authors contributed equally to this work. </br>'
	'&dagger; To whom correspondence should be addressed: yss@berkeley.edu </br>'
	'DOI: https://doi.org/10.1101/2022.11.15.516532',
	unsafe_allow_html = True)