Added model
Browse files- Ab-PepC_logo.png +0 -0
- amino_acid_composition.py +45 -0
- app.py +224 -0
- model.pkl +3 -0
- requirements.txt +6 -0
Ab-PepC_logo.png
ADDED
amino_acid_composition.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""amino_acid_composition.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1YebtHJU3a9oNapMztiEku0M2VToI_1Lm
|
8 |
+
"""
|
9 |
+
|
10 |
+
# amino_acid_composition.py
|
11 |
+
|
12 |
+
def amino_acid_composition(sequence):
|
13 |
+
amino_acids = 'ACDEFGHIKLMNPQRSTVWY'
|
14 |
+
composition = {aa: 0 for aa in amino_acids}
|
15 |
+
total = len(sequence)
|
16 |
+
|
17 |
+
for aa in sequence:
|
18 |
+
if aa in composition:
|
19 |
+
composition[aa] += 1
|
20 |
+
|
21 |
+
for aa in composition:
|
22 |
+
composition[aa] = (composition[aa] / total) * 100
|
23 |
+
|
24 |
+
return composition
|
25 |
+
|
26 |
+
def process_dataset(dataset):
|
27 |
+
compositions = []
|
28 |
+
for sequence in dataset:
|
29 |
+
compositions.append(amino_acid_composition(sequence))
|
30 |
+
return compositions
|
31 |
+
|
32 |
+
def main(active_peptides, inactive_peptides):
|
33 |
+
active_compositions = process_dataset(active_peptides)
|
34 |
+
inactive_compositions = process_dataset(inactive_peptides)
|
35 |
+
return active_compositions, inactive_compositions
|
36 |
+
|
37 |
+
if __name__ == "__main__":
|
38 |
+
# Example usage
|
39 |
+
active_peptides = ["ACDEFGHIKLMNPQRSTVWY", "ACDEFGHIKLMN"]
|
40 |
+
inactive_peptides = ["QRSTVWYACDEFGHIKLMN", "HIKLMNPQRST"]
|
41 |
+
|
42 |
+
active_compositions, inactive_compositions = main(active_peptides, inactive_peptides)
|
43 |
+
print("Active Peptide Compositions:", active_compositions)
|
44 |
+
print("Inactive Peptide Compositions:", inactive_compositions)
|
45 |
+
|
app.py
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import streamlit as st
|
2 |
+
# import pandas as pd
|
3 |
+
# import joblib
|
4 |
+
# from sklearn.ensemble import RandomForestClassifier
|
5 |
+
# import matplotlib.pyplot as plt
|
6 |
+
# import seaborn as sns
|
7 |
+
|
8 |
+
# # Load the trained model (ensure the model file is in the same directory)
|
9 |
+
# model = joblib.load('model.pkl')
|
10 |
+
|
11 |
+
# # Function to process new peptide sequences
|
12 |
+
# def process_peptide_sequences(peptides):
|
13 |
+
# # Example processing function, replace with actual preprocessing steps
|
14 |
+
# compositions = []
|
15 |
+
# for peptide in peptides:
|
16 |
+
# composition = {aa: peptide.count(aa) for aa in 'ACDEFGHIKLMNPQRSTVWY'}
|
17 |
+
# compositions.append(composition)
|
18 |
+
# return pd.DataFrame(compositions)
|
19 |
+
|
20 |
+
# # Streamlit app
|
21 |
+
# st.title("ABPep-C")
|
22 |
+
# st.write("Classify peptide sequences as active or inactive against biofilm")
|
23 |
+
|
24 |
+
# # Input: Peptide sequences
|
25 |
+
# peptide_input = st.text_area("Enter peptide sequences (one per line)")
|
26 |
+
# peptides = peptide_input.split('\n')
|
27 |
+
|
28 |
+
# if st.button("Classify"):
|
29 |
+
# if peptides:
|
30 |
+
# # Process the input peptides
|
31 |
+
# peptide_df = process_peptide_sequences(peptides)
|
32 |
+
|
33 |
+
# # Predict using the trained model
|
34 |
+
# predictions = model.predict(peptide_df)
|
35 |
+
# results = pd.DataFrame({
|
36 |
+
# 'Peptide': peptides,
|
37 |
+
# 'Prediction': predictions
|
38 |
+
# })
|
39 |
+
# results['Prediction'] = results['Prediction'].map({0: 'Inactive', 1: 'Active'})
|
40 |
+
|
41 |
+
# # Display the results
|
42 |
+
# st.write("Classification Results")
|
43 |
+
# st.write(results)
|
44 |
+
|
45 |
+
# # Display interactive graphs
|
46 |
+
# st.write("Prediction Distribution")
|
47 |
+
# fig, ax = plt.subplots()
|
48 |
+
# sns.countplot(x='Prediction', data=results, ax=ax)
|
49 |
+
# st.pyplot(fig)
|
50 |
+
|
51 |
+
# st.write("Amino Acid Composition of Peptides")
|
52 |
+
# amino_acid_counts = peptide_df.sum().reset_index()
|
53 |
+
# amino_acid_counts.columns = ['Amino Acid', 'Count']
|
54 |
+
# fig, ax = plt.subplots()
|
55 |
+
# sns.barplot(x='Amino Acid', y='Count', data=amino_acid_counts, ax=ax)
|
56 |
+
# st.pyplot(fig)
|
57 |
+
# else:
|
58 |
+
# st.write("Please enter peptide sequences.")
|
59 |
+
|
60 |
+
# # Save this script as app.py and run it using: streamlit run app.
|
61 |
+
#######################################################################################################################################
|
62 |
+
# import streamlit as st
|
63 |
+
# import pandas as pd
|
64 |
+
# import joblib
|
65 |
+
# from sklearn.ensemble import RandomForestClassifier
|
66 |
+
# import matplotlib.pyplot as plt
|
67 |
+
# import seaborn as sns
|
68 |
+
|
69 |
+
# # Load the trained model (ensure the model file is in the same directory)
|
70 |
+
# model = joblib.load('model.pkl')
|
71 |
+
|
72 |
+
# # Function to process new peptide sequences
|
73 |
+
# def process_peptide_sequences(peptides):
|
74 |
+
# # Example processing function, replace with actual preprocessing steps
|
75 |
+
# compositions = []
|
76 |
+
# for peptide in peptides:
|
77 |
+
# composition = {aa: peptide.count(aa) for aa in 'ACDEFGHIKLMNPQRSTVWY'}
|
78 |
+
# compositions.append(composition)
|
79 |
+
# return pd.DataFrame(compositions)
|
80 |
+
|
81 |
+
# # Custom CSS for font size and color
|
82 |
+
# st.markdown("""
|
83 |
+
# <style>
|
84 |
+
# .title {
|
85 |
+
# font-size: 48px !important;
|
86 |
+
# color: #4CAF50;
|
87 |
+
# }
|
88 |
+
# .subheader {
|
89 |
+
# font-size: 24px !important;
|
90 |
+
# color: #FF5722;
|
91 |
+
# }
|
92 |
+
# .text {
|
93 |
+
# font-size: 18px !important;
|
94 |
+
# }
|
95 |
+
# </style>
|
96 |
+
# """, unsafe_allow_html=True)
|
97 |
+
|
98 |
+
# # Streamlit app
|
99 |
+
# st.markdown('<h1 class="title">Ab-PepC</h1>', unsafe_allow_html=True)
|
100 |
+
# st.markdown('<h2 class="subheader">Classify peptide sequences as active or inactive against biofilm</h2>', unsafe_allow_html=True)
|
101 |
+
|
102 |
+
# # Input: Peptide sequences
|
103 |
+
# peptide_input = st.text_area("Enter peptide sequences (one per line)")
|
104 |
+
# peptides = peptide_input.split('\n')
|
105 |
+
|
106 |
+
# if st.button("Classify"):
|
107 |
+
# if peptides:
|
108 |
+
# # Process the input peptides
|
109 |
+
# peptide_df = process_peptide_sequences(peptides)
|
110 |
+
|
111 |
+
# # Predict using the trained model
|
112 |
+
# predictions = model.predict(peptide_df)
|
113 |
+
# results = pd.DataFrame({
|
114 |
+
# 'Peptide': peptides,
|
115 |
+
# 'Prediction': predictions
|
116 |
+
# })
|
117 |
+
# results['Prediction'] = results['Prediction'].map({0: 'Inactive', 1: 'Active'})
|
118 |
+
|
119 |
+
# # Display the results
|
120 |
+
# st.markdown('<h3 class="subheader">Classification Results</h3>', unsafe_allow_html=True)
|
121 |
+
# st.dataframe(results)
|
122 |
+
|
123 |
+
# # Display interactive graphs
|
124 |
+
# st.markdown('<h3 class="subheader">Prediction Distribution</h3>', unsafe_allow_html=True)
|
125 |
+
# fig, ax = plt.subplots()
|
126 |
+
# sns.countplot(x='Prediction', data=results, ax=ax)
|
127 |
+
# ax.set_xlabel('Prediction', fontsize=18)
|
128 |
+
# ax.set_ylabel('Count', fontsize=18)
|
129 |
+
# st.pyplot(fig)
|
130 |
+
|
131 |
+
# st.markdown('<h3 class="subheader">Amino Acid Composition of Peptides</h3>', unsafe_allow_html=True)
|
132 |
+
# amino_acid_counts = peptide_df.sum().reset_index()
|
133 |
+
# amino_acid_counts.columns = ['Amino Acid', 'Count']
|
134 |
+
# fig, ax = plt.subplots()
|
135 |
+
# sns.barplot(x='Amino Acid', y='Count', data=amino_acid_counts, ax=ax)
|
136 |
+
# ax.set_xlabel('Amino Acid', fontsize=18)
|
137 |
+
# ax.set_ylabel('Count', fontsize=18)
|
138 |
+
# st.pyplot(fig)
|
139 |
+
# else:
|
140 |
+
# st.write("Please enter peptide sequences.")
|
141 |
+
#######################################################################################################################################
|
142 |
+
import streamlit as st
|
143 |
+
import pandas as pd
|
144 |
+
import joblib
|
145 |
+
from sklearn.ensemble import RandomForestClassifier
|
146 |
+
import matplotlib.pyplot as plt
|
147 |
+
import seaborn as sns
|
148 |
+
|
149 |
+
# Load the trained model (ensure the model file is in the same directory)
|
150 |
+
model = joblib.load('model.pkl')
|
151 |
+
|
152 |
+
# Function to process new peptide sequences
|
153 |
+
def process_peptide_sequences(peptides):
|
154 |
+
# Example processing function, replace with actual preprocessing steps
|
155 |
+
compositions = []
|
156 |
+
for peptide in peptides:
|
157 |
+
composition = {aa: peptide.count(aa) for aa in 'ACDEFGHIKLMNPQRSTVWY'}
|
158 |
+
compositions.append(composition)
|
159 |
+
return pd.DataFrame(compositions)
|
160 |
+
|
161 |
+
# Custom CSS for font size and color
|
162 |
+
st.markdown("""
|
163 |
+
<style>
|
164 |
+
.title {
|
165 |
+
font-size: 48px !important;
|
166 |
+
color: #4CAF50;
|
167 |
+
}
|
168 |
+
.subheader {
|
169 |
+
font-size: 24px !important;
|
170 |
+
color: #FF5722;
|
171 |
+
}
|
172 |
+
.text {
|
173 |
+
font-size: 18px !important;
|
174 |
+
}
|
175 |
+
</style>
|
176 |
+
""", unsafe_allow_html=True)
|
177 |
+
|
178 |
+
# Streamlit app
|
179 |
+
col1, col2 = st.columns([1, 4]) # Adjust the width ratio as needed
|
180 |
+
col1.image('Ab-PepC_logo.png', width=150) # Add your logo file path here
|
181 |
+
with col2:
|
182 |
+
st.markdown('<h1 class="title">ABPep-C</h1>', unsafe_allow_html=True)
|
183 |
+
st.markdown('<h2 class="subheader">Classify peptide sequences as active or inactive against biofilm</h2>', unsafe_allow_html=True)
|
184 |
+
|
185 |
+
# Input: Peptide sequences
|
186 |
+
peptide_input = st.text_area("Enter peptide sequences (one per line)")
|
187 |
+
peptides = peptide_input.split('\n')
|
188 |
+
|
189 |
+
if st.button("Classify"):
|
190 |
+
if peptides:
|
191 |
+
# Process the input peptides
|
192 |
+
peptide_df = process_peptide_sequences(peptides)
|
193 |
+
|
194 |
+
# Predict using the trained model
|
195 |
+
predictions = model.predict(peptide_df)
|
196 |
+
results = pd.DataFrame({
|
197 |
+
'Peptide': peptides,
|
198 |
+
'Prediction': predictions
|
199 |
+
})
|
200 |
+
results['Prediction'] = results['Prediction'].map({0: 'Inactive', 1: 'Active'})
|
201 |
+
|
202 |
+
# Display the results
|
203 |
+
st.markdown('<h3 class="subheader">Classification Results</h3>', unsafe_allow_html=True)
|
204 |
+
st.dataframe(results)
|
205 |
+
|
206 |
+
# Display interactive graphs
|
207 |
+
st.markdown('<h3 class="subheader">Prediction Distribution</h3>', unsafe_allow_html=True)
|
208 |
+
fig, ax = plt.subplots()
|
209 |
+
sns.countplot(x='Prediction', data=results, ax=ax)
|
210 |
+
ax.set_xlabel('Prediction', fontsize=18)
|
211 |
+
ax.set_ylabel('Count', fontsize=18)
|
212 |
+
st.pyplot(fig)
|
213 |
+
|
214 |
+
st.markdown('<h3 class="subheader">Amino Acid Composition of Peptides</h3>', unsafe_allow_html=True)
|
215 |
+
amino_acid_counts = peptide_df.sum().reset_index()
|
216 |
+
amino_acid_counts.columns = ['Amino Acid', 'Count']
|
217 |
+
fig, ax = plt.subplots()
|
218 |
+
sns.barplot(x='Amino Acid', y='Count', data=amino_acid_counts, ax=ax)
|
219 |
+
ax.set_xlabel('Amino Acid', fontsize=18)
|
220 |
+
ax.set_ylabel('Count', fontsize=18)
|
221 |
+
st.pyplot(fig)
|
222 |
+
else:
|
223 |
+
st.write("Please enter peptide sequences.")
|
224 |
+
|
model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9458d55fb443d797241263126170928459672aad1a93864939bbe36092c8394d
|
3 |
+
size 309433
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
joblib
|
4 |
+
scikit-learn
|
5 |
+
matplotlib
|
6 |
+
seaborn
|