BalayogiG commited on
Commit
0b84c0f
1 Parent(s): ec2fffb

Added model

Browse files
Files changed (5) hide show
  1. Ab-PepC_logo.png +0 -0
  2. amino_acid_composition.py +45 -0
  3. app.py +224 -0
  4. model.pkl +3 -0
  5. requirements.txt +6 -0
Ab-PepC_logo.png ADDED
amino_acid_composition.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """amino_acid_composition.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1YebtHJU3a9oNapMztiEku0M2VToI_1Lm
8
+ """
9
+
10
+ # amino_acid_composition.py
11
+
12
+ def amino_acid_composition(sequence):
13
+ amino_acids = 'ACDEFGHIKLMNPQRSTVWY'
14
+ composition = {aa: 0 for aa in amino_acids}
15
+ total = len(sequence)
16
+
17
+ for aa in sequence:
18
+ if aa in composition:
19
+ composition[aa] += 1
20
+
21
+ for aa in composition:
22
+ composition[aa] = (composition[aa] / total) * 100
23
+
24
+ return composition
25
+
26
+ def process_dataset(dataset):
27
+ compositions = []
28
+ for sequence in dataset:
29
+ compositions.append(amino_acid_composition(sequence))
30
+ return compositions
31
+
32
+ def main(active_peptides, inactive_peptides):
33
+ active_compositions = process_dataset(active_peptides)
34
+ inactive_compositions = process_dataset(inactive_peptides)
35
+ return active_compositions, inactive_compositions
36
+
37
+ if __name__ == "__main__":
38
+ # Example usage
39
+ active_peptides = ["ACDEFGHIKLMNPQRSTVWY", "ACDEFGHIKLMN"]
40
+ inactive_peptides = ["QRSTVWYACDEFGHIKLMN", "HIKLMNPQRST"]
41
+
42
+ active_compositions, inactive_compositions = main(active_peptides, inactive_peptides)
43
+ print("Active Peptide Compositions:", active_compositions)
44
+ print("Inactive Peptide Compositions:", inactive_compositions)
45
+
app.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import streamlit as st
2
+ # import pandas as pd
3
+ # import joblib
4
+ # from sklearn.ensemble import RandomForestClassifier
5
+ # import matplotlib.pyplot as plt
6
+ # import seaborn as sns
7
+
8
+ # # Load the trained model (ensure the model file is in the same directory)
9
+ # model = joblib.load('model.pkl')
10
+
11
+ # # Function to process new peptide sequences
12
+ # def process_peptide_sequences(peptides):
13
+ # # Example processing function, replace with actual preprocessing steps
14
+ # compositions = []
15
+ # for peptide in peptides:
16
+ # composition = {aa: peptide.count(aa) for aa in 'ACDEFGHIKLMNPQRSTVWY'}
17
+ # compositions.append(composition)
18
+ # return pd.DataFrame(compositions)
19
+
20
+ # # Streamlit app
21
+ # st.title("ABPep-C")
22
+ # st.write("Classify peptide sequences as active or inactive against biofilm")
23
+
24
+ # # Input: Peptide sequences
25
+ # peptide_input = st.text_area("Enter peptide sequences (one per line)")
26
+ # peptides = peptide_input.split('\n')
27
+
28
+ # if st.button("Classify"):
29
+ # if peptides:
30
+ # # Process the input peptides
31
+ # peptide_df = process_peptide_sequences(peptides)
32
+
33
+ # # Predict using the trained model
34
+ # predictions = model.predict(peptide_df)
35
+ # results = pd.DataFrame({
36
+ # 'Peptide': peptides,
37
+ # 'Prediction': predictions
38
+ # })
39
+ # results['Prediction'] = results['Prediction'].map({0: 'Inactive', 1: 'Active'})
40
+
41
+ # # Display the results
42
+ # st.write("Classification Results")
43
+ # st.write(results)
44
+
45
+ # # Display interactive graphs
46
+ # st.write("Prediction Distribution")
47
+ # fig, ax = plt.subplots()
48
+ # sns.countplot(x='Prediction', data=results, ax=ax)
49
+ # st.pyplot(fig)
50
+
51
+ # st.write("Amino Acid Composition of Peptides")
52
+ # amino_acid_counts = peptide_df.sum().reset_index()
53
+ # amino_acid_counts.columns = ['Amino Acid', 'Count']
54
+ # fig, ax = plt.subplots()
55
+ # sns.barplot(x='Amino Acid', y='Count', data=amino_acid_counts, ax=ax)
56
+ # st.pyplot(fig)
57
+ # else:
58
+ # st.write("Please enter peptide sequences.")
59
+
60
+ # # Save this script as app.py and run it using: streamlit run app.
61
+ #######################################################################################################################################
62
+ # import streamlit as st
63
+ # import pandas as pd
64
+ # import joblib
65
+ # from sklearn.ensemble import RandomForestClassifier
66
+ # import matplotlib.pyplot as plt
67
+ # import seaborn as sns
68
+
69
+ # # Load the trained model (ensure the model file is in the same directory)
70
+ # model = joblib.load('model.pkl')
71
+
72
+ # # Function to process new peptide sequences
73
+ # def process_peptide_sequences(peptides):
74
+ # # Example processing function, replace with actual preprocessing steps
75
+ # compositions = []
76
+ # for peptide in peptides:
77
+ # composition = {aa: peptide.count(aa) for aa in 'ACDEFGHIKLMNPQRSTVWY'}
78
+ # compositions.append(composition)
79
+ # return pd.DataFrame(compositions)
80
+
81
+ # # Custom CSS for font size and color
82
+ # st.markdown("""
83
+ # <style>
84
+ # .title {
85
+ # font-size: 48px !important;
86
+ # color: #4CAF50;
87
+ # }
88
+ # .subheader {
89
+ # font-size: 24px !important;
90
+ # color: #FF5722;
91
+ # }
92
+ # .text {
93
+ # font-size: 18px !important;
94
+ # }
95
+ # </style>
96
+ # """, unsafe_allow_html=True)
97
+
98
+ # # Streamlit app
99
+ # st.markdown('<h1 class="title">Ab-PepC</h1>', unsafe_allow_html=True)
100
+ # st.markdown('<h2 class="subheader">Classify peptide sequences as active or inactive against biofilm</h2>', unsafe_allow_html=True)
101
+
102
+ # # Input: Peptide sequences
103
+ # peptide_input = st.text_area("Enter peptide sequences (one per line)")
104
+ # peptides = peptide_input.split('\n')
105
+
106
+ # if st.button("Classify"):
107
+ # if peptides:
108
+ # # Process the input peptides
109
+ # peptide_df = process_peptide_sequences(peptides)
110
+
111
+ # # Predict using the trained model
112
+ # predictions = model.predict(peptide_df)
113
+ # results = pd.DataFrame({
114
+ # 'Peptide': peptides,
115
+ # 'Prediction': predictions
116
+ # })
117
+ # results['Prediction'] = results['Prediction'].map({0: 'Inactive', 1: 'Active'})
118
+
119
+ # # Display the results
120
+ # st.markdown('<h3 class="subheader">Classification Results</h3>', unsafe_allow_html=True)
121
+ # st.dataframe(results)
122
+
123
+ # # Display interactive graphs
124
+ # st.markdown('<h3 class="subheader">Prediction Distribution</h3>', unsafe_allow_html=True)
125
+ # fig, ax = plt.subplots()
126
+ # sns.countplot(x='Prediction', data=results, ax=ax)
127
+ # ax.set_xlabel('Prediction', fontsize=18)
128
+ # ax.set_ylabel('Count', fontsize=18)
129
+ # st.pyplot(fig)
130
+
131
+ # st.markdown('<h3 class="subheader">Amino Acid Composition of Peptides</h3>', unsafe_allow_html=True)
132
+ # amino_acid_counts = peptide_df.sum().reset_index()
133
+ # amino_acid_counts.columns = ['Amino Acid', 'Count']
134
+ # fig, ax = plt.subplots()
135
+ # sns.barplot(x='Amino Acid', y='Count', data=amino_acid_counts, ax=ax)
136
+ # ax.set_xlabel('Amino Acid', fontsize=18)
137
+ # ax.set_ylabel('Count', fontsize=18)
138
+ # st.pyplot(fig)
139
+ # else:
140
+ # st.write("Please enter peptide sequences.")
141
+ #######################################################################################################################################
142
+ import streamlit as st
143
+ import pandas as pd
144
+ import joblib
145
+ from sklearn.ensemble import RandomForestClassifier
146
+ import matplotlib.pyplot as plt
147
+ import seaborn as sns
148
+
149
+ # Load the trained model (ensure the model file is in the same directory)
150
+ model = joblib.load('model.pkl')
151
+
152
+ # Function to process new peptide sequences
153
+ def process_peptide_sequences(peptides):
154
+ # Example processing function, replace with actual preprocessing steps
155
+ compositions = []
156
+ for peptide in peptides:
157
+ composition = {aa: peptide.count(aa) for aa in 'ACDEFGHIKLMNPQRSTVWY'}
158
+ compositions.append(composition)
159
+ return pd.DataFrame(compositions)
160
+
161
+ # Custom CSS for font size and color
162
+ st.markdown("""
163
+ <style>
164
+ .title {
165
+ font-size: 48px !important;
166
+ color: #4CAF50;
167
+ }
168
+ .subheader {
169
+ font-size: 24px !important;
170
+ color: #FF5722;
171
+ }
172
+ .text {
173
+ font-size: 18px !important;
174
+ }
175
+ </style>
176
+ """, unsafe_allow_html=True)
177
+
178
+ # Streamlit app
179
+ col1, col2 = st.columns([1, 4]) # Adjust the width ratio as needed
180
+ col1.image('Ab-PepC_logo.png', width=150) # Add your logo file path here
181
+ with col2:
182
+ st.markdown('<h1 class="title">ABPep-C</h1>', unsafe_allow_html=True)
183
+ st.markdown('<h2 class="subheader">Classify peptide sequences as active or inactive against biofilm</h2>', unsafe_allow_html=True)
184
+
185
+ # Input: Peptide sequences
186
+ peptide_input = st.text_area("Enter peptide sequences (one per line)")
187
+ peptides = peptide_input.split('\n')
188
+
189
+ if st.button("Classify"):
190
+ if peptides:
191
+ # Process the input peptides
192
+ peptide_df = process_peptide_sequences(peptides)
193
+
194
+ # Predict using the trained model
195
+ predictions = model.predict(peptide_df)
196
+ results = pd.DataFrame({
197
+ 'Peptide': peptides,
198
+ 'Prediction': predictions
199
+ })
200
+ results['Prediction'] = results['Prediction'].map({0: 'Inactive', 1: 'Active'})
201
+
202
+ # Display the results
203
+ st.markdown('<h3 class="subheader">Classification Results</h3>', unsafe_allow_html=True)
204
+ st.dataframe(results)
205
+
206
+ # Display interactive graphs
207
+ st.markdown('<h3 class="subheader">Prediction Distribution</h3>', unsafe_allow_html=True)
208
+ fig, ax = plt.subplots()
209
+ sns.countplot(x='Prediction', data=results, ax=ax)
210
+ ax.set_xlabel('Prediction', fontsize=18)
211
+ ax.set_ylabel('Count', fontsize=18)
212
+ st.pyplot(fig)
213
+
214
+ st.markdown('<h3 class="subheader">Amino Acid Composition of Peptides</h3>', unsafe_allow_html=True)
215
+ amino_acid_counts = peptide_df.sum().reset_index()
216
+ amino_acid_counts.columns = ['Amino Acid', 'Count']
217
+ fig, ax = plt.subplots()
218
+ sns.barplot(x='Amino Acid', y='Count', data=amino_acid_counts, ax=ax)
219
+ ax.set_xlabel('Amino Acid', fontsize=18)
220
+ ax.set_ylabel('Count', fontsize=18)
221
+ st.pyplot(fig)
222
+ else:
223
+ st.write("Please enter peptide sequences.")
224
+
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9458d55fb443d797241263126170928459672aad1a93864939bbe36092c8394d
3
+ size 309433
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ joblib
4
+ scikit-learn
5
+ matplotlib
6
+ seaborn