Spaces:

ntranoslab
/

diff-tol

Running

App Files Files Community

Grant commited on Apr 17

Commit

27f6851

1 Parent(s): 9b8ec97

initial commit

Browse files

Files changed (6) hide show

.gitattributes +3 -0
ALL_hum_proteins_ESM1b_del_sub.zip +3 -0
app.py +173 -0
rand_samp_gw_del_sub.csv.gz +3 -0
requirements.txt +6 -0
uniprot_ids.tsv.gz +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+ALL_hum_proteins_ESM1b_del_sub.zip filter=lfs diff=lfs merge=lfs -text
+rand_samp_gw_del_sub.csv.gz filter=lfs diff=lfs merge=lfs -text
+uniprot_ids.tsv.gz filter=lfs diff=lfs merge=lfs -text

ALL_hum_proteins_ESM1b_del_sub.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37ae869590a649ac957f42fe2aec0d7f7c59890aa274dfa48187031ebf164189
+size 463673322

app.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import time
+import plotly.graph_objects as go
+from scipy.ndimage import gaussian_filter1d
+from zipfile import ZipFile
+np.random.seed(2024)
+uids = pd.read_csv("uniprot_ids.tsv.gz", names=["selection"], header=None, sep="\t")
+# del_sub_merge = pd.read_csv("del_sub_data.csv.gz")
+zf = ZipFile("ALL_hum_isoforms_ESM1b_del_sub.zip")
+width=600
+def plot_interactive_scatter(uid: str):
+    user_data = pd.read_csv(zf.open(f"{uid}.csv"))
+    # Create scatter plot for user-specified data
+    user_trace = go.Scatter(
+    x=-np.log10(user_data.aPLLR),
+    y=user_data.avg_LLR,
+    mode='markers',
+    name=f"{uid}<br>Data",
+    text=user_data.site,
+    hoverinfo='text',
+    marker=dict(color='orange'))
+    return user_trace, user_data
+def plot_interactive_line(uid_data: pd.DataFrame, uid: str, score: str, mutation: str,
+                          hline1: float, hline2: float):
+    esm_data = -np.log10(uid_data[score]) if score == "aPLLR" else uid_data[score]
+    x_ticks = uid_data["site"].tolist()
+    plot_data = esm_data
+    hover_text = [f"{x}: {np.round(y, 3)}" for x, y in zip(uid_data.site, plot_data)]
+    line_trace = go.Scatter(
+        x=np.arange(1, len(uid_data)+1),
+        y=plot_data,
+        mode='lines',
+        text=hover_text,
+        hoverinfo='text',
+        marker=dict(color='orange')
+    )
+    line_fig = go.Figure(data=[line_trace])
+    line_fig.update_layout(
+        title=f"{uid} {mutation} Scores by Position",
+        yaxis_title=f'{mutation} Score<br>(More Negative = More Damaging)',
+        yaxis=dict(showgrid=False, zeroline=False, showline=False),
+        height=300,
+        hoverlabel=dict(  # Set hover label font size
+            font=dict(size=16)  # Specify the font size of the hover text
+        )
+    )
+    for hline in [hline1, hline2]:
+        line_fig.add_shape(
+            type='line',
+            x0=0, x1=1, y0=hline, y1=hline,
+            xref='paper', yref='y',
+            line=dict(color='Black', dash='dash'),
+        )
+    return line_fig
+selection = st.selectbox("", uids.selection, index=26592)
+selection_uid = selection.split(",")[0]
+# Base dataset
+base_data = pd.read_csv("rand_samp_gw_del_sub.csv.gz")
+# Create base scatter plot
+base_trace = go.Scatter(
+    x=-np.log10(base_data.aPLLR),
+    y=base_data.avg_LLR,
+    mode='markers',
+    name='Sample of<br>Genome-Wide<br>Data',
+    hoverinfo='none', # Disable hover information for the base data
+    marker=dict(color='grey')
+)
+# User-specified data
+ut, ud = plot_interactive_scatter(selection_uid)
+# Combine traces
+fig = go.Figure([base_trace, ut])
+# Customize layout
+fig.update_layout(
+    title='Deletion v Substitution Effects',
+    xaxis_title='Deletion Score',
+    yaxis_title='Substitution Score',
+    yaxis=dict(showgrid=False, showline=False, zeroline=False),
+    legend=dict(
+        font=dict(size=15), # Specify the font size of the legend text
+        bordercolor="grey",
+        borderwidth=1
+    ),
+    hoverlabel=dict(  # Set hover label font size
+        font=dict(size=16)  # Specify the font size of the hover text
+    )
+)
+fig.update_yaxes(showgrid=False)
+# Extract out percentiles
+del_bot, del_top =  0.16500809479645437, -0.7801050825906862
+for del_cutoff in [del_bot, del_top]:
+    fig.add_shape(
+        type='line',
+        x0=del_cutoff, x1=del_cutoff, y0=0, y1=1,
+        xref='x', yref='paper',
+        line=dict(color='Black', width=2)
+    )
+# to avoid reading the entire dataset into memory
+sub_bot, sub_top = -12.004105263157896, -4.871947368421053
+for sub_cutoff in [sub_bot, sub_top]:
+    fig.add_shape(
+        type='line',
+        x0=0, x1=1, y0=sub_cutoff, y1=sub_cutoff,
+        xref='paper', yref='y',
+        line=dict(color='Black', width=2),
+    )
+fig.add_annotation(
+    x=2.5,
+    y=-18,
+    text=r"D<sup>+</sup>S<sup>—</sup>",
+    font=dict(color="green", size=24),
+    showarrow=False
+)
+fig.add_annotation(
+    x=-1.5,
+    y=0.5,
+    text=r"D<sup>—</sup>S<sup>+</sup>",
+    font=dict(color="red", size=24),
+    showarrow=False
+)
+lt_apllr = plot_interactive_line(ud, selection_uid, "aPLLR", "Deletion", del_bot, del_top)
+lt_llr = plot_interactive_line(ud, selection_uid, "avg_LLR", "Substitution", sub_bot, sub_top)
+# Show the scatter plot
+st.plotly_chart(fig)
+show_line_plots = st.checkbox("Show Deletion and Substitution Effects Alone")
+if show_line_plots:
+    st.plotly_chart(lt_apllr)
+    st.plotly_chart(lt_llr)
+st.download_button(
+    label=f"Download {selection_uid} data as CSV",
+    data=ud.reset_index(drop=True)[["site", "aPLLR", "avg_LLR"]].to_csv(),
+    file_name = f"{selection_uid}_del_sub.csv",
+    mime='text/csv'
+)
+st.markdown("""
+**README**:
+- Deletion scores are *visualized* on the -log10 scale.
+- The genome-wide dataset can be downloaded by clicking [here](https://huggingface.co/spaces/goldmangrant/diff-tol/blob/main/ALL_hum_isoforms_ESM1b_del_sub.zip) (or go to files tab).
+- Non-aggregated substitution effects can be downloaded or browsed [here](https://huggingface.co/spaces/ntranoslab/esm_variants).
+- Additional supplementary data from the paper can be downloaded [here](https://github.com/ntranoslab/diff-tol).
+""")

rand_samp_gw_del_sub.csv.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e734bd249ed357c18da17a266dc6a1b711eb63753ef1b7e6a8da3b31e41aa73b
+size 237298

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi
+uvicorn[standard]
+pandas
+plotly
+numpy
+scipy

uniprot_ids.tsv.gz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e211fb640e9f114e3ee026da3a27dcc9a4fefe8ecf25d558ea69a7d5323eb76a
+size 198728