shaktidharreddy08 commited on
Commit
8c7ba08
1 Parent(s): 3fde8ff

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from PIL import Image
4
+ import subprocess
5
+ import os
6
+ import base64
7
+ import pickle
8
+
9
+ # Molecular descriptor calculator
10
+ def desc_calc():
11
+ # Performs the descriptor calculation
12
+ bashCommand = "java -Xms2G -Xmx2G -Djava.awt.headless=true -jar ./PaDEL-Descriptor/PaDEL-Descriptor.jar -removesalt -standardizenitro -fingerprints -descriptortypes ./PaDEL-Descriptor/PubchemFingerprinter.xml -dir ./ -file descriptors_output.csv"
13
+ process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE,shell=True)
14
+ output, error = process.communicate()
15
+ os.remove('molecule.smi')
16
+
17
+ # File download
18
+ def filedownload(df):
19
+ csv = df.to_csv(index=False)
20
+ b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions
21
+ href = f'<a href="data:file/csv;base64,{b64}" download="prediction.csv">Download Predictions</a>'
22
+ return href
23
+
24
+ # Model building
25
+ def build_model(input_data):
26
+ # Reads in saved regression model
27
+ load_model = pickle.load(open('acetylcholinesterase_model.pkl', 'rb'))
28
+ # Apply model to make predictions
29
+ prediction = load_model.predict(input_data)
30
+ st.header('**Prediction output**')
31
+ prediction_output = pd.Series(prediction, name='pIC50')
32
+ molecule_name = pd.Series(load_data[1], name='molecule_name')
33
+ df = pd.concat([molecule_name, prediction_output], axis=1)
34
+ st.write(df)
35
+ st.markdown(filedownload(df), unsafe_allow_html=True)
36
+
37
+ # Logo image
38
+ image = Image.open('logo.png')
39
+
40
+ st.image(image, use_column_width=True)
41
+
42
+ # Page title
43
+ st.markdown("""
44
+ # Better Than a Placebo - Optum Hackathon 2022
45
+
46
+ This app allows you to predict the bioactivity towards inhibting the `Acetylcholinesterase` enzyme. `Acetylcholinesterase` is a drug target for Alzheimer's disease.
47
+
48
+ **Credits**
49
+ - App built in `Python` + `Streamlit` by Better Than a Placebo team - Optum Hackathon 2022
50
+ - Descriptor calculated using [PaDEL-Descriptor](http://www.yapcwsoft.com/dd/padeldescriptor/) [[Read the Paper]](https://doi.org/10.1002/jcc.21707).
51
+ ---
52
+ """)
53
+
54
+ # Sidebar
55
+ with st.sidebar.header('1. Upload your CSV data'):
56
+ uploaded_file = st.sidebar.file_uploader("Upload your input file", type=['txt'])
57
+ st.sidebar.markdown("""
58
+ [Example input file](https://raw.githubusercontent.com/dataprofessor/bioactivity-prediction-app/main/example_acetylcholinesterase.txt)
59
+ """)
60
+
61
+ if st.sidebar.button('Predict'):
62
+ load_data = pd.read_table(uploaded_file, sep=' ', header=None)
63
+ load_data.to_csv('molecule.smi', sep = '\t', header = False, index = False)
64
+
65
+ st.header('**Original input data**')
66
+ st.write(load_data)
67
+
68
+ with st.spinner("Calculating descriptors..."):
69
+ desc_calc()
70
+
71
+ # Read in calculated descriptors and display the dataframe
72
+ st.header('**Calculated molecular descriptors**')
73
+ desc = pd.read_csv('descriptors_output.csv')
74
+ st.write(desc)
75
+ st.write(desc.shape)
76
+
77
+ # Read descriptor list used in previously built model
78
+ st.header('**Subset of descriptors from previously built models**')
79
+ Xlist = list(pd.read_csv('descriptor_list.csv').columns)
80
+ desc_subset = desc[Xlist]
81
+ st.write(desc_subset)
82
+ st.write(desc_subset.shape)
83
+
84
+ # Apply trained model to make prediction on query compounds
85
+ build_model(desc_subset)
86
+ else:
87
+ st.info('Upload input data in the sidebar to start!')