xinfyxinfy commited on
Commit
ca3430a
1 Parent(s): 62a66d7

Upload 16 files

Browse files
.gitattributes CHANGED
@@ -32,3 +32,15 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ models/mcpas/bestmodel_alphabetapeptide.hdf5 filter=lfs diff=lfs merge=lfs -text
36
+ models/mcpas/bestmodel_alphabetaptptidemhc.hdf5 filter=lfs diff=lfs merge=lfs -text
37
+ models/mcpas/bestmodel_alphapeptide.hdf5 filter=lfs diff=lfs merge=lfs -text
38
+ models/mcpas/bestmodel_alphapeptidemhc.hdf5 filter=lfs diff=lfs merge=lfs -text
39
+ models/mcpas/bestmodel_betapeptide.hdf5 filter=lfs diff=lfs merge=lfs -text
40
+ models/mcpas/bestmodel_betapeptidemhc.hdf5 filter=lfs diff=lfs merge=lfs -text
41
+ models/vdjdb/bestmodel_alphabetapeptide.hdf5 filter=lfs diff=lfs merge=lfs -text
42
+ models/vdjdb/bestmodel_alphabetapeptidemhc.hdf5 filter=lfs diff=lfs merge=lfs -text
43
+ models/vdjdb/bestmodel_alphapeptide.hdf5 filter=lfs diff=lfs merge=lfs -text
44
+ models/vdjdb/bestmodel_alphapeptidemhc.hdf5 filter=lfs diff=lfs merge=lfs -text
45
+ models/vdjdb/bestmodel_betapeptide.hdf5 filter=lfs diff=lfs merge=lfs -text
46
+ models/vdjdb/bestmodel_betapeptidemhc.hdf5 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import os
5
+ os.environ["CUDA_VISIBLE_DEVICES"]="-1" ### load on cpu if GPU is making issue
6
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
7
+ from tensorflow.keras.models import load_model
8
+ import time
9
+ # from PIL import Image
10
+
11
+ st.set_page_config(page_title="TCR-ESM",page_icon="dna")
12
+
13
+ hide_streamlit_style = """
14
+ <style>
15
+ #root > div:nth-child(1) > div > div > div > div > section > div {padding-top: 2rem;}
16
+ </style>
17
+
18
+ """
19
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
20
+
21
+ # image = Image.open('TCR-ESM.png')
22
+ # st.image(image)
23
+
24
+ st.title('TCR-ESM')
25
+ st.subheader('a webserver accompanying our work on predicting TCR-peptide-MHC binding with large protein model (ESM1v) embeddings')
26
+
27
+ dataset = st.radio("Please select the Training Databse",('MCPAS', 'VDJDB'), horizontal=True)
28
+
29
+ task = st.radio("Please select the Prediction Task",("TCR\u03B1-TCR\u03B2-Peptide-MHC", "TCR\u03B1-TCR\u03B2-Peptide", "TCR\u03B1-Peptide-MHC",
30
+ "TCR\u03B2-Peptide-MHC", "TCR\u03B1-Peptide", "TCR\u03B2-Peptide"), horizontal=True)
31
+
32
+ with open("sample_input_data.zip", "rb") as file:
33
+ btn = st.download_button(label="Download Sample Input Data",data=file,file_name="sample_input_data.zip", mime="application/octet-stream")
34
+ # st.download_button('Download Sample Input Data', open('tcresm_sample_input.zip'))
35
+
36
+ ############## get numpy files
37
+ if task == "TCR\u03B1-TCR\u03B2-Peptide-MHC":
38
+ alpha = st.file_uploader("Choose the .npy file containing TCR\u03B1 Embeddings", key=101)
39
+ beta = st.file_uploader("Choose the .npy file containing TCR\u03B2 Embeddings", key=103)
40
+ pepti = st.file_uploader("Choose the .npy file containing Peptide Embeddings", key=109)
41
+ mhc = st.file_uploader("Choose the .npy file containing MHC Embeddings", key=113)
42
+ shorttask = 'abpm'
43
+ group = (alpha,beta,pepti,mhc)
44
+ elif task == "TCR\u03B1-TCR\u03B2-Peptide":
45
+ alpha = st.file_uploader("Choose the .npy file containing TCR\u03B1 Embeddings", key=127)
46
+ beta = st.file_uploader("Choose the .npy file containing TCR\u03B2 Embeddings", key=131)
47
+ pepti = st.file_uploader("Choose the .npy file containing Peptide Embeddings", key=137)
48
+ shorttask = 'abp'
49
+ group = (alpha,beta,pepti)
50
+ elif task == "TCR\u03B1-Peptide-MHC":
51
+ alpha = st.file_uploader("Choose the .npy file containing TCR\u03B1 Embeddings", key=139)
52
+ pepti = st.file_uploader("Choose the .npy file containing Peptide Embeddings", key=149)
53
+ mhc = st.file_uploader("Choose the .npy file containing MHC Embeddings", key=151)
54
+ shorttask = 'apm'
55
+ group = (alpha,pepti,mhc)
56
+ elif task == "TCR\u03B2-Peptide-MHC":
57
+ beta = st.file_uploader("Choose the .npy file containing TCR\u03B2 Embeddings", key=157)
58
+ pepti = st.file_uploader("Choose the .npy file containing Peptide Embeddings", key=163)
59
+ mhc = st.file_uploader("Choose the .npy file containing MHC Embeddings", key=167)
60
+ shorttask = 'bpm'
61
+ group = (beta,pepti,mhc)
62
+ elif task == "TCR\u03B1-Peptide":
63
+ alpha = st.file_uploader("Choose the .npy file containing TCR\u03B1 Embeddings", key=173)
64
+ pepti = st.file_uploader("Choose the .npy file containing Peptide Embeddings", key=179)
65
+ shorttask = 'ap'
66
+ group = (alpha,pepti)
67
+ elif task == "TCR\u03B2-Peptide":
68
+ beta = st.file_uploader("Choose the .npy file containing TCR\u03B2 Embeddings", key=181)
69
+ pepti = st.file_uploader("Choose the .npy file containing Peptide Embeddings", key=191)
70
+ shorttask = 'bp'
71
+ group = (beta,pepti)
72
+
73
+
74
+
75
+
76
+
77
+ ##################### ML predict function
78
+ @st.cache_data
79
+ def predict_on_batch_output(dataset,shorttask,group):
80
+
81
+ if dataset == 'MCPAS':
82
+ dataset='mcpas'
83
+ elif dataset== 'VDJDB':
84
+ dataset ='vdjdb'
85
+
86
+
87
+ if dataset=='mcpas' and shorttask=='abp':
88
+ #load data
89
+ alpha, beta, pep = group
90
+ alpha_np, beta_np, pep_np = np.load(alpha), np.load(beta), np.load(pep)
91
+ #load model
92
+ model = load_model('models/mcpas/bestmodel_alphabetapeptide.hdf5',compile=False)
93
+ #predict_on_batch
94
+ output = model.predict_on_batch([alpha_np, beta_np, pep_np])
95
+ elif dataset=='mcpas' and shorttask=='abpm':
96
+ #load data
97
+ alpha, beta, pep, mhc = group
98
+ alpha_np, beta_np, pep_np, mhc_np = np.load(alpha), np.load(beta), np.load(pep), np.load(mhc)
99
+ #load model
100
+ model = load_model('models/mcpas/bestmodel_alphabetaptptidemhc.hdf5',compile=False)
101
+ #predict_on_batch
102
+ output = model.predict_on_batch([alpha_np, beta_np, pep_np, mhc_np])
103
+ elif dataset=='mcpas' and shorttask=='ap':
104
+ #load data
105
+ alpha, pep, = group
106
+ alpha_np, pep_np, = np.load(alpha), np.load(pep)
107
+ #load model
108
+ model = load_model('models/mcpas/bestmodel_alphapeptide.hdf5',compile=False)
109
+ #predict_on_batch
110
+ output = model.predict_on_batch([alpha_np,pep_np])
111
+ elif dataset=='mcpas' and shorttask=='bp':
112
+ #load data
113
+ beta, pep = group
114
+ beta_np, pep_np = np.load(beta), np.load(pep)
115
+ #load model
116
+ model = load_model('models/mcpas/bestmodel_betapeptide.hdf5',compile=False)
117
+ #predict_on_batch
118
+ output = model.predict_on_batch([beta_np, pep_np])
119
+ elif dataset=='mcpas' and shorttask=='apm':
120
+ #load data
121
+ alpha, pep, mhc = group
122
+ alpha_np, pep_np, mhc_np = np.load(alpha), np.load(pep), np.load(mhc)
123
+ #load model
124
+ model = load_model('models/mcpas/bestmodel_alphapeptidemhc.hdf5',compile=False)
125
+ #predict_on_batch
126
+ output = model.predict_on_batch([alpha_np, pep_np, mhc_np])
127
+ elif dataset=='mcpas' and shorttask=='bpm':
128
+ #load data
129
+ beta, pep, mhc = group
130
+ beta_np, pep_np, mhc_np = np.load(beta), np.load(pep), np.load(mhc)
131
+ #load model
132
+ model = load_model('models/mcpas/bestmodel_betapeptidemhc.hdf5',compile=False)
133
+ #predict_on_batch
134
+ output = model.predict_on_batch([beta_np, pep_np, mhc_np])
135
+ elif dataset=='vdjdb' and shorttask=='abp':
136
+ #load data
137
+ alpha, beta, pep = group
138
+ alpha_np, beta_np, pep_np = np.load(alpha), np.load(beta), np.load(pep)
139
+ #load model
140
+ model = load_model('models/vdjdb/bestmodel_alphabetapeptide.hdf5',compile=False)
141
+ #predict_on_batch
142
+ output = model.predict_on_batch([alpha_np, beta_np, pep_np])
143
+ elif dataset=='vdjdb' and shorttask=='abpm':
144
+ #load data
145
+ alpha, beta, pep, mhc = group
146
+ alpha_np, beta_np, pep_np, mhc_np = np.load(alpha), np.load(beta), np.load(pep), np.load(mhc)
147
+ #load model
148
+ model = load_model('models/vdjdb/bestmodel_alphabetapeptidemhc.hdf5',compile=False)
149
+ #predict_on_batch
150
+ output = model.predict_on_batch([alpha_np, beta_np, pep_np, mhc_np])
151
+ elif dataset=='vdjdb' and shorttask=='ap':
152
+ #load data
153
+ alpha, pep, = group
154
+ alpha_np, pep_np, = np.load(alpha), np.load(pep)
155
+ #load model
156
+ model = load_model('models/vdjdb/bestmodel_alphapeptide.hdf5',compile=False)
157
+ #predict_on_batch
158
+ output = model.predict_on_batch([alpha_np, pep_np])
159
+ elif dataset=='vdjdb' and shorttask=='bp':
160
+ #load data
161
+ beta, pep = group
162
+ beta_np, pep_np = np.load(beta), np.load(pep)
163
+ #load model
164
+ model = load_model('models/vdjdb/bestmodel_betapeptide.hdf5',compile=False)
165
+ #predict_on_batch
166
+ output = model.predict_on_batch([beta_np, pep_np])
167
+ elif dataset=='vdjdb' and shorttask=='apm':
168
+ #load data
169
+ alpha, pep, mhc = group
170
+ alpha_np, pep_np, mhc_np = np.load(alpha), np.load(pep), np.load(mhc)
171
+ #load model
172
+ model = load_model('models/vdjdb/bestmodel_alphapeptidemhc.hdf5',compile=False)
173
+ #predict_on_batch
174
+ output = model.predict_on_batch([alpha_np, pep_np, mhc_np])
175
+ elif dataset=='vdjdb' and shorttask=='bpm':
176
+ #load data
177
+ beta, pep, mhc = group
178
+ beta_np, pep_np, mhc_np = np.load(beta), np.load(pep), np.load(mhc)
179
+ #load model
180
+ model = load_model('models/vdjdb/bestmodel_betapeptidemhc.hdf5',compile=False)
181
+ #predict_on_batch
182
+ output = model.predict_on_batch([beta_np, pep_np, mhc_np])
183
+
184
+ # return np.around(output.squeeze(), 4)
185
+
186
+ val = np.squeeze(output)
187
+ return val
188
+
189
+ @st.cache_data
190
+ def convert_df(df):
191
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
192
+ return df.to_csv().encode('utf-8')
193
+
194
+
195
+ #####################
196
+ if st.button('Submit'):
197
+ # with st.spinner('Wait for it...'):
198
+ # time.sleep(0.5)
199
+ # res = predict_on_batch_output(dataset,shorttask,group)
200
+ # st.write("Binding Probabilities")
201
+ # st.dataframe((np.round(res, 4)))
202
+ # csv = convert_df(pd.DataFrame(np.round(res, 4), columns=['output']))
203
+ # st.download_button(label="Download Predictions",data=csv,file_name='tcresm_predictions.csv', mime='text/csv')
204
+ try:
205
+ res = predict_on_batch_output(dataset,shorttask,group)
206
+ with st.spinner('Calculating ...'):
207
+ time.sleep(0.5)
208
+ st.write("Binding Probabilities")
209
+ st.dataframe((np.round(res, 4)), use_container_width=500, height=500)
210
+ csv = convert_df(pd.DataFrame(np.round(res, 4), columns=['output']))
211
+ st.download_button(label="Download Predictions",data=csv,file_name='tcresm_predictions.csv', mime='text/csv')
212
+ except:
213
+ st.error('Please ensure you have uploaded the files before pressing the Submit button', icon="🚨")
214
+
215
+
216
+
217
+ if st.button("Clear All"):
218
+ # Clear values from *all* all in-memory and on-disk data caches:
219
+ # i.e. clear values from both square and cube
220
+ st.cache_data.clear()
221
+
222
+
223
+
224
+ st.caption('Developed By: Shashank Yadav : shashank[at]arizona.edu', unsafe_allow_html=True)
models/mcpas/bestmodel_alphabetapeptide.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e82a6ce84f760362b5502769d23d6af6e1955e3425faf36ebde58d61466175c9
3
+ size 9945464
models/mcpas/bestmodel_alphabetaptptidemhc.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06010fffacf7feebae8a7109742795b0914d6c5007960abe215a92d07b937f67
3
+ size 11343872
models/mcpas/bestmodel_alphapeptide.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99fb0db64587a7abf7e85068c27ae0702cca524da0fc49be925ef547c52e86da
3
+ size 7172184
models/mcpas/bestmodel_alphapeptidemhc.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94a58dc69a724f62de72098409e63b0e9cd09ac1b1f3bb2a317067a64dc69b10
3
+ size 8566304
models/mcpas/bestmodel_betapeptide.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6a470ea165a9fa9a02f6fb493dfe3b6f4332667042de0c5b9ce3020a1d56e4a
3
+ size 7172184
models/mcpas/bestmodel_betapeptidemhc.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:310e7c9a59408d0befaa46c7a9942c5642ab42f65c06b5de93b6755ae426965b
3
+ size 8566304
models/vdjdb/bestmodel_alphabetapeptide.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51dd068f6205ef33e67f75ba8ac26da064367b7b4de0baa178090d7b5e252cb6
3
+ size 9945464
models/vdjdb/bestmodel_alphabetapeptidemhc.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51eed8fd1e39f13db3bc3bcca7194c4d450aab603b6f35088fb9c9be868c07a8
3
+ size 18827264
models/vdjdb/bestmodel_alphapeptide.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f77bcfa0c11e60821b45aa859593cd8352a4d566b867f328c4ad00b83ff027f
3
+ size 7172184
models/vdjdb/bestmodel_alphapeptidemhc.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9ea40fa3d12ceb411c75df88ecef05368e87b6485519ef876bd05de6dc7dee8
3
+ size 8566304
models/vdjdb/bestmodel_betapeptide.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ba2ac4e3c602cfbe1774e5e6fb20c87bf01996e0d534da7e912d922e429a269
3
+ size 7172184
models/vdjdb/bestmodel_betapeptidemhc.hdf5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b112ad593e7693e6831ec5c6bea5039e60db38aff8bc56a2e9f2f0238857e29
3
+ size 8566304
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ numpy
2
+ pandas
3
+ tensorflow
sample_input_data.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2e35aa3f3fe1c15a7e595c642444b1c3433679509ef6271f3ac44e15a083485
3
+ size 611761