Grant commited on
Commit
27f6851
·
1 Parent(s): 9b8ec97

initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ALL_hum_proteins_ESM1b_del_sub.zip filter=lfs diff=lfs merge=lfs -text
37
+ rand_samp_gw_del_sub.csv.gz filter=lfs diff=lfs merge=lfs -text
38
+ uniprot_ids.tsv.gz filter=lfs diff=lfs merge=lfs -text
ALL_hum_proteins_ESM1b_del_sub.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37ae869590a649ac957f42fe2aec0d7f7c59890aa274dfa48187031ebf164189
3
+ size 463673322
app.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import time
5
+ import plotly.graph_objects as go
6
+ from scipy.ndimage import gaussian_filter1d
7
+ from zipfile import ZipFile
8
+
9
+ np.random.seed(2024)
10
+
11
+ uids = pd.read_csv("uniprot_ids.tsv.gz", names=["selection"], header=None, sep="\t")
12
+ # del_sub_merge = pd.read_csv("del_sub_data.csv.gz")
13
+ zf = ZipFile("ALL_hum_isoforms_ESM1b_del_sub.zip")
14
+
15
+ width=600
16
+
17
+ def plot_interactive_scatter(uid: str):
18
+
19
+ user_data = pd.read_csv(zf.open(f"{uid}.csv"))
20
+
21
+ # Create scatter plot for user-specified data
22
+ user_trace = go.Scatter(
23
+ x=-np.log10(user_data.aPLLR),
24
+ y=user_data.avg_LLR,
25
+ mode='markers',
26
+ name=f"{uid}<br>Data",
27
+ text=user_data.site,
28
+ hoverinfo='text',
29
+ marker=dict(color='orange'))
30
+
31
+ return user_trace, user_data
32
+
33
+ def plot_interactive_line(uid_data: pd.DataFrame, uid: str, score: str, mutation: str,
34
+ hline1: float, hline2: float):
35
+
36
+ esm_data = -np.log10(uid_data[score]) if score == "aPLLR" else uid_data[score]
37
+ x_ticks = uid_data["site"].tolist()
38
+
39
+ plot_data = esm_data
40
+ hover_text = [f"{x}: {np.round(y, 3)}" for x, y in zip(uid_data.site, plot_data)]
41
+
42
+ line_trace = go.Scatter(
43
+ x=np.arange(1, len(uid_data)+1),
44
+ y=plot_data,
45
+ mode='lines',
46
+ text=hover_text,
47
+ hoverinfo='text',
48
+ marker=dict(color='orange')
49
+ )
50
+ line_fig = go.Figure(data=[line_trace])
51
+ line_fig.update_layout(
52
+ title=f"{uid} {mutation} Scores by Position",
53
+ yaxis_title=f'{mutation} Score<br>(More Negative = More Damaging)',
54
+ yaxis=dict(showgrid=False, zeroline=False, showline=False),
55
+ height=300,
56
+ hoverlabel=dict( # Set hover label font size
57
+ font=dict(size=16) # Specify the font size of the hover text
58
+ )
59
+ )
60
+ for hline in [hline1, hline2]:
61
+ line_fig.add_shape(
62
+ type='line',
63
+ x0=0, x1=1, y0=hline, y1=hline,
64
+ xref='paper', yref='y',
65
+ line=dict(color='Black', dash='dash'),
66
+ )
67
+ return line_fig
68
+
69
+ selection = st.selectbox("", uids.selection, index=26592)
70
+ selection_uid = selection.split(",")[0]
71
+
72
+ # Base dataset
73
+ base_data = pd.read_csv("rand_samp_gw_del_sub.csv.gz")
74
+
75
+ # Create base scatter plot
76
+ base_trace = go.Scatter(
77
+ x=-np.log10(base_data.aPLLR),
78
+ y=base_data.avg_LLR,
79
+ mode='markers',
80
+ name='Sample of<br>Genome-Wide<br>Data',
81
+ hoverinfo='none', # Disable hover information for the base data
82
+ marker=dict(color='grey')
83
+ )
84
+
85
+ # User-specified data
86
+ ut, ud = plot_interactive_scatter(selection_uid)
87
+
88
+ # Combine traces
89
+ fig = go.Figure([base_trace, ut])
90
+
91
+ # Customize layout
92
+ fig.update_layout(
93
+ title='Deletion v Substitution Effects',
94
+ xaxis_title='Deletion Score',
95
+ yaxis_title='Substitution Score',
96
+ yaxis=dict(showgrid=False, showline=False, zeroline=False),
97
+ legend=dict(
98
+ font=dict(size=15), # Specify the font size of the legend text
99
+ bordercolor="grey",
100
+ borderwidth=1
101
+ ),
102
+ hoverlabel=dict( # Set hover label font size
103
+ font=dict(size=16) # Specify the font size of the hover text
104
+ )
105
+ )
106
+
107
+ fig.update_yaxes(showgrid=False)
108
+
109
+ # Extract out percentiles
110
+ del_bot, del_top = 0.16500809479645437, -0.7801050825906862
111
+ for del_cutoff in [del_bot, del_top]:
112
+ fig.add_shape(
113
+ type='line',
114
+ x0=del_cutoff, x1=del_cutoff, y0=0, y1=1,
115
+ xref='x', yref='paper',
116
+ line=dict(color='Black', width=2)
117
+ )
118
+
119
+ # to avoid reading the entire dataset into memory
120
+ sub_bot, sub_top = -12.004105263157896, -4.871947368421053
121
+ for sub_cutoff in [sub_bot, sub_top]:
122
+ fig.add_shape(
123
+ type='line',
124
+ x0=0, x1=1, y0=sub_cutoff, y1=sub_cutoff,
125
+ xref='paper', yref='y',
126
+ line=dict(color='Black', width=2),
127
+ )
128
+
129
+ fig.add_annotation(
130
+ x=2.5,
131
+ y=-18,
132
+ text=r"D<sup>+</sup>S<sup>—</sup>",
133
+ font=dict(color="green", size=24),
134
+ showarrow=False
135
+ )
136
+
137
+ fig.add_annotation(
138
+ x=-1.5,
139
+ y=0.5,
140
+ text=r"D<sup>—</sup>S<sup>+</sup>",
141
+ font=dict(color="red", size=24),
142
+ showarrow=False
143
+ )
144
+
145
+ lt_apllr = plot_interactive_line(ud, selection_uid, "aPLLR", "Deletion", del_bot, del_top)
146
+
147
+ lt_llr = plot_interactive_line(ud, selection_uid, "avg_LLR", "Substitution", sub_bot, sub_top)
148
+
149
+ # Show the scatter plot
150
+ st.plotly_chart(fig)
151
+
152
+ show_line_plots = st.checkbox("Show Deletion and Substitution Effects Alone")
153
+
154
+ if show_line_plots:
155
+ st.plotly_chart(lt_apllr)
156
+ st.plotly_chart(lt_llr)
157
+
158
+ st.download_button(
159
+ label=f"Download {selection_uid} data as CSV",
160
+ data=ud.reset_index(drop=True)[["site", "aPLLR", "avg_LLR"]].to_csv(),
161
+ file_name = f"{selection_uid}_del_sub.csv",
162
+ mime='text/csv'
163
+ )
164
+
165
+
166
+
167
+ st.markdown("""
168
+ **README**:
169
+ - Deletion scores are *visualized* on the -log10 scale.
170
+ - The genome-wide dataset can be downloaded by clicking [here](https://huggingface.co/spaces/goldmangrant/diff-tol/blob/main/ALL_hum_isoforms_ESM1b_del_sub.zip) (or go to files tab).
171
+ - Non-aggregated substitution effects can be downloaded or browsed [here](https://huggingface.co/spaces/ntranoslab/esm_variants).
172
+ - Additional supplementary data from the paper can be downloaded [here](https://github.com/ntranoslab/diff-tol).
173
+ """)
rand_samp_gw_del_sub.csv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e734bd249ed357c18da17a266dc6a1b711eb63753ef1b7e6a8da3b31e41aa73b
3
+ size 237298
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ pandas
4
+ plotly
5
+ numpy
6
+ scipy
uniprot_ids.tsv.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e211fb640e9f114e3ee026da3a27dcc9a4fefe8ecf25d558ea69a7d5323eb76a
3
+ size 198728