|
import streamlit as st |
|
import pandas as pd |
|
from PIL import Image |
|
import subprocess |
|
import os |
|
import base64 |
|
import pickle |
|
|
|
|
|
def desc_calc(): |
|
|
|
bashCommand = "java -Xms2G -Xmx2G -Djava.awt.headless=true -jar ./PaDEL-Descriptor/PaDEL-Descriptor.jar -removesalt -standardizenitro -fingerprints -descriptortypes ./PaDEL-Descriptor/ExtendedFingerprinter.xml -dir ./ -file descriptors_output.csv" |
|
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE) |
|
output, error = process.communicate() |
|
os.remove('molecule.smi') |
|
|
|
|
|
def filedownload(df): |
|
csv = df.to_csv(index=False) |
|
b64 = base64.b64encode(csv.encode()).decode() |
|
href = f'<a href="data:file/csv;base64,{b64}" download="prediction.csv">Download Predictions</a>' |
|
return href |
|
|
|
|
|
def build_model(input_data): |
|
|
|
load_model = pickle.load(open('hERG_model.pkl', 'rb')) |
|
|
|
prediction = load_model.predict(input_data) |
|
st.header('**Prediction output**') |
|
prediction_output = pd.Series(prediction, name='hERG Activity') |
|
molecule_name = pd.Series(load_data[1], name='Molecule Name') |
|
df = pd.concat([molecule_name, prediction_output], axis=1) |
|
st.write(df) |
|
st.markdown(filedownload(df), unsafe_allow_html=True) |
|
|
|
|
|
image = Image.open('logo.png') |
|
|
|
st.image(image, use_column_width=True) |
|
|
|
|
|
st.markdown(""" |
|
# hERG Cardiotoxicity Prediction |
|
|
|
Criteria we used |
|
|
|
IC50 > 30 uM No |
|
IC50 < 1 uM Yes |
|
|
|
|
|
This web app allows you to predict the hERG cardiotoxicity of your chosen compounds. |
|
|
|
**Credits** |
|
- App built in `Python` + `Streamlit` |
|
- Descriptor calculated using [PaDEL-Descriptor](http://www.yapcwsoft.com/dd/padeldescriptor/) [[Read the Paper]](https://doi.org/10.1002/jcc.21707). |
|
--- |
|
""") |
|
|
|
|
|
with st.sidebar.header('1. Upload your CSV data'): |
|
uploaded_file = st.sidebar.file_uploader("Upload your input file", type=['txt']) |
|
st.sidebar.markdown(""" |
|
[Example input file](https://raw.githubusercontent.com/dataprofessor/bioactivity-prediction-app/main/example_acetylcholinesterase.txt) |
|
""") |
|
|
|
if st.sidebar.button('Predict'): |
|
load_data = pd.read_table(uploaded_file, sep=' ', header=None) |
|
load_data.to_csv('molecule.smi', sep = '\t', header = False, index = False) |
|
|
|
st.header('**Original input data**') |
|
st.write(load_data) |
|
|
|
with st.spinner("Calculating descriptors..."): |
|
desc_calc() |
|
|
|
|
|
st.header('**Calculated molecular descriptors**') |
|
desc = pd.read_csv('descriptors_output.csv') |
|
st.write(desc) |
|
st.write(desc.shape) |
|
|
|
|
|
st.header('**Subset of descriptors from previously built models**') |
|
Xlist = list(pd.read_csv('descriptors_list.csv').columns) |
|
desc_subset = desc[Xlist] |
|
st.write(desc_subset) |
|
st.write(desc_subset.shape) |
|
|
|
|
|
build_model(desc_subset) |
|
else: |
|
st.info('Upload input data in the sidebar to start!') |
|
|