diff-tol / app.py
Grant
revert
e152613
import streamlit as st
import pandas as pd
import numpy as np
import time
import plotly.graph_objects as go
from scipy.ndimage import gaussian_filter1d
from zipfile import ZipFile
np.random.seed(2024)
uids = pd.read_csv("uniprot_ids_isoforms.tsv.gz", names=["selection"], header=None, sep="\t")
# del_sub_merge = pd.read_csv("del_sub_data.csv.gz")
zf = ZipFile("ALL_hum_isoforms_ESM1b_del_sub.zip")
width=600
def plot_interactive_scatter(uid: str):
user_data = pd.read_csv(zf.open(f"{uid}.csv"))
# Create scatter plot for user-specified data
user_trace = go.Scatter(
x=-np.log10(user_data.aPLLR),
y=user_data.avg_LLR,
mode='markers',
name=f"{uid}<br>Data",
text=user_data.site,
hoverinfo='text',
marker=dict(color='orange'))
return user_trace, user_data
def plot_interactive_line(uid_data: pd.DataFrame, uid: str, score: str, mutation: str,
hline1: float, hline2: float):
esm_data = -np.log10(uid_data[score]) if score == "aPLLR" else uid_data[score]
x_ticks = uid_data["site"].tolist()
plot_data = esm_data
hover_text = [f"{x}: {np.round(y, 3)}" for x, y in zip(uid_data.site, plot_data)]
line_trace = go.Scatter(
x=np.arange(1, len(uid_data)+1),
y=plot_data,
mode='lines',
text=hover_text,
hoverinfo='text',
marker=dict(color='orange')
)
line_fig = go.Figure(data=[line_trace])
line_fig.update_layout(
title=f"{uid} {mutation} Scores by Position",
yaxis_title=f'{mutation} Score<br>(More Negative = More Damaging)',
yaxis=dict(showgrid=False, zeroline=False, showline=False),
height=300,
hoverlabel=dict( # Set hover label font size
font=dict(size=16) # Specify the font size of the hover text
)
)
for hline in [hline1, hline2]:
line_fig.add_shape(
type='line',
x0=0, x1=1, y0=hline, y1=hline,
xref='paper', yref='y',
line=dict(color='Black', dash='dash'),
)
return line_fig
selection = st.selectbox("", uids.selection, index=26592)
selection_uid = selection.split(",")[0]
# Base dataset
base_data = pd.read_csv("rand_samp_gw_del_sub.csv.gz")
# Create base scatter plot
base_trace = go.Scatter(
x=-np.log10(base_data.aPLLR),
y=base_data.avg_LLR,
mode='markers',
name='Sample of<br>Genome-Wide<br>Data',
hoverinfo='none', # Disable hover information for the base data
marker=dict(color='grey')
)
# User-specified data
ut, ud = plot_interactive_scatter(selection_uid)
# Combine traces
fig = go.Figure([base_trace, ut])
# Customize layout
fig.update_layout(
title='Deletion v Substitution Effects',
xaxis_title='Deletion Score',
yaxis_title='Substitution Score',
yaxis=dict(showgrid=False, showline=False, zeroline=False),
legend=dict(
font=dict(size=15), # Specify the font size of the legend text
bordercolor="grey",
borderwidth=1
),
hoverlabel=dict( # Set hover label font size
font=dict(size=16) # Specify the font size of the hover text
)
)
fig.update_yaxes(showgrid=False)
# Extract out percentiles
del_bot, del_top = 0.147907659054341, -0.8033614237502615
for del_cutoff in [del_bot, del_top]:
fig.add_shape(
type='line',
x0=del_cutoff, x1=del_cutoff, y0=0, y1=1,
xref='x', yref='paper',
line=dict(color='Black', width=2)
)
# to avoid reading the entire dataset into memory
sub_bot, sub_top = -12.294105263157894, -4.898842105263157
for sub_cutoff in [sub_bot, sub_top]:
fig.add_shape(
type='line',
x0=0, x1=1, y0=sub_cutoff, y1=sub_cutoff,
xref='paper', yref='y',
line=dict(color='Black', width=2),
)
fig.add_annotation(
x=2.5,
y=-18,
text=r"D<sup>+</sup>S<sup>—</sup>",
font=dict(color="green", size=24),
showarrow=False
)
fig.add_annotation(
x=-1.5,
y=0.5,
text=r"D<sup>—</sup>S<sup>+</sup>",
font=dict(color="red", size=24),
showarrow=False
)
lt_apllr = plot_interactive_line(ud, selection_uid, "aPLLR", "Deletion", del_bot, del_top)
lt_llr = plot_interactive_line(ud, selection_uid, "avg_LLR", "Substitution", sub_bot, sub_top)
# Show the scatter plot
st.plotly_chart(fig)
show_line_plots = st.checkbox("Show Deletion and Substitution Effects Alone")
if show_line_plots:
st.plotly_chart(lt_apllr)
st.plotly_chart(lt_llr)
st.download_button(
label=f"Download {selection_uid} data as CSV",
data=ud.reset_index(drop=True)[["site", "aPLLR", "avg_LLR"]].to_csv(),
file_name = f"{selection_uid}_del_sub.csv",
mime='text/csv'
)
st.markdown("""
**README**:
- Deletion scores are *visualized* on the -log10 scale.
- The genome-wide dataset can be downloaded by clicking [here](https://huggingface.co/spaces/goldmangrant/diff-tol/blob/main/ALL_hum_isoforms_ESM1b_del_sub.zip) (or go to files tab).
- Non-aggregated substitution effects can be downloaded or browsed [here](https://huggingface.co/spaces/ntranoslab/esm_variants).
- Additional supplementary data from the paper can be downloaded [here](https://github.com/ntranoslab/diff-tol).
""")