ThorbenFroehlking commited on
Commit
3a463dd
·
1 Parent(s): 64f6421
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
.ipynb_checkpoints/2IWI-checkpoint.pdb ADDED
The diff for this file is too large to render. See raw diff
 
.ipynb_checkpoints/4BDU-checkpoint.pdb ADDED
The diff for this file is too large to render. See raw diff
 
.ipynb_checkpoints/4BDU_A_scored-checkpoint.pdb ADDED
The diff for this file is too large to render. See raw diff
 
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -1,6 +1,9 @@
1
  import gradio as gr
2
  import requests
3
- from Bio.PDB import PDBParser
 
 
 
4
  import numpy as np
5
  import os
6
  from gradio_molecule3d import Molecule3D
@@ -25,6 +28,8 @@ from datasets import Dataset
25
 
26
  from scipy.special import expit
27
 
 
 
28
  # Load model and move to device
29
  checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
30
  max_length = 1500
@@ -37,119 +42,250 @@ def normalize_scores(scores):
37
  min_score = np.min(scores)
38
  max_score = np.max(scores)
39
  return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
40
-
41
  def read_mol(pdb_path):
42
  """Read PDB file and return its content as a string"""
43
  with open(pdb_path, 'r') as f:
44
  return f.read()
45
 
46
- def fetch_pdb(pdb_id):
47
- pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
48
- pdb_path = f'{pdb_id}.pdb'
49
- response = requests.get(pdb_url)
50
- if response.status_code == 200:
51
- with open(pdb_path, 'wb') as f:
52
- f.write(response.content)
53
- return pdb_path
54
  else:
55
  return None
56
 
57
- def process_pdb(pdb_id, segment):
58
- pdb_path = fetch_pdb(pdb_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  if not pdb_path:
60
- return "Failed to fetch PDB file", None, None
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- parser = PDBParser(QUIET=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  structure = parser.get_structure('protein', pdb_path)
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  try:
66
  chain = structure[0][segment]
67
  except KeyError:
68
  return "Invalid Chain ID", None, None
69
 
70
-
71
- aa_dict = {
72
- 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',
73
- 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',
74
- 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',
75
- 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',
76
- 'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'
77
- }
78
-
79
- # Exclude non-amino acid residues
80
- sequence = "".join(
81
- aa_dict[residue.get_resname().strip()]
82
- for residue in chain
83
- if residue.get_resname().strip() in aa_dict
84
- )
85
- sequence2 = [
86
- (res.id[1], res) for res in chain
87
- if res.get_resname().strip() in aa_dict
88
- ]
89
 
90
  # Prepare input for model prediction
91
  input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
92
  with torch.no_grad():
93
  outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
94
-
95
  # Calculate scores and normalize them
96
  scores = expit(outputs[:, 1] - outputs[:, 0])
97
  normalized_scores = normalize_scores(scores)
98
-
99
- # Zip residues with scores to track the residue ID and score
100
- residue_scores = [(resi, score) for (resi, _), score in zip(sequence2, normalized_scores)]
101
 
102
- result_str = "\n".join([
103
- f"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
104
- for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict
105
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
- # Save the predictions to a file
108
  prediction_file = f"{pdb_id}_predictions.txt"
109
  with open(prediction_file, "w") as f:
110
  f.write(result_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- return result_str, molecule(pdb_path, residue_scores, segment), prediction_file
 
113
 
114
  def molecule(input_pdb, residue_scores=None, segment='A'):
115
  mol = read_mol(input_pdb) # Read PDB file content
116
-
117
  # Prepare high-scoring residues script if scores are provided
118
  high_score_script = ""
119
  if residue_scores is not None:
120
- # Sort residues based on their scores
121
  high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
122
  mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
123
 
124
  high_score_script = """
125
- // Reset all styles first
126
- viewer.getModel(0).setStyle({}, {});
127
-
128
- // Show only the selected chain
129
- viewer.getModel(0).setStyle(
130
  {"chain": "%s"},
131
- { cartoon: {colorscheme:"whiteCarbon"} }
132
  );
133
-
134
- // Highlight high-scoring residues only for the selected chain
135
- let highScoreResidues = [%s];
136
- viewer.getModel(0).setStyle(
137
- {"chain": "%s", "resi": highScoreResidues},
 
138
  {"stick": {"color": "red"}}
139
  );
140
 
141
- // Highlight medium-scoring residues only for the selected chain
142
- let midScoreResidues = [%s];
143
- viewer.getModel(0).setStyle(
144
- {"chain": "%s", "resi": midScoreResidues},
 
145
  {"stick": {"color": "orange"}}
146
  );
147
- """ % (segment,
148
- ", ".join(str(resi) for resi in high_score_residues),
149
- segment,
150
- ", ".join(str(resi) for resi in mid_score_residues),
151
- segment)
 
 
152
 
 
153
  html_content = f"""
154
  <!DOCTYPE html>
155
  <html>
@@ -173,13 +309,6 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
173
  let element = $("#container");
174
  let config = {{ backgroundColor: "white" }};
175
  let viewer = $3Dmol.createViewer(element, config);
176
- viewer.addModel(pdb, "pdb");
177
-
178
- // Reset all styles and show only selected chain
179
- viewer.getModel(0).setStyle(
180
- {{"chain": "{segment}"}},
181
- {{ cartoon: {{ colorscheme:"whiteCarbon" }} }}
182
- );
183
 
184
  {high_score_script}
185
 
@@ -221,39 +350,50 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
221
  # Return the HTML content within an iframe safely encoded for special characters
222
  return f'<iframe width="100%" height="700" srcdoc="{html_content.replace(chr(34), "&quot;").replace(chr(39), "&#39;")}"></iframe>'
223
 
224
- reps = [
225
- {
226
- "model": 0,
227
- "style": "cartoon",
228
- "color": "whiteCarbon",
229
- "residue_range": "",
230
- "around": 0,
231
- "byres": False,
232
- }
233
- ]
234
 
235
  # Gradio UI
236
  with gr.Blocks() as demo:
237
  gr.Markdown("# Protein Binding Site Prediction")
 
238
  with gr.Row():
239
- pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
240
  visualize_btn = gr.Button("Visualize Structure")
241
 
242
- molecule_output2 = Molecule3D(label="Protein Structure", reps=reps)
 
 
 
 
 
 
 
 
 
243
 
244
  with gr.Row():
245
- #pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
246
  segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
247
  prediction_btn = gr.Button("Predict Binding Site")
248
 
 
249
  molecule_output = gr.HTML(label="Protein Structure")
250
  predictions_output = gr.Textbox(label="Binding Site Predictions")
251
- download_output = gr.File(label="Download Predictions")
252
-
253
- visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)
254
-
255
- prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  gr.Markdown("## Examples")
258
  gr.Examples(
259
  examples=[
 
1
  import gradio as gr
2
  import requests
3
+ from Bio.PDB import PDBParser, MMCIFParser, PDBIO
4
+ from Bio.PDB.Polypeptide import is_aa
5
+ from Bio.SeqUtils import seq1
6
+ from typing import Optional, Tuple
7
  import numpy as np
8
  import os
9
  from gradio_molecule3d import Molecule3D
 
28
 
29
  from scipy.special import expit
30
 
31
+
32
+
33
  # Load model and move to device
34
  checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
35
  max_length = 1500
 
42
  min_score = np.min(scores)
43
  max_score = np.max(scores)
44
  return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
45
+
46
  def read_mol(pdb_path):
47
  """Read PDB file and return its content as a string"""
48
  with open(pdb_path, 'r') as f:
49
  return f.read()
50
 
51
+ def fetch_structure(pdb_id: str, output_dir: str = ".") -> Optional[str]:
52
+ """
53
+ Fetch the structure file for a given PDB ID. Prioritizes CIF files.
54
+ If a structure file already exists locally, it uses that.
55
+ """
56
+ file_path = download_structure(pdb_id, output_dir)
57
+ if file_path:
58
+ return file_path
59
  else:
60
  return None
61
 
62
+ def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:
63
+ """
64
+ Attempt to download the structure file in CIF or PDB format.
65
+ Returns the path to the downloaded file, or None if download fails.
66
+ """
67
+ for ext in ['.cif', '.pdb']:
68
+ file_path = os.path.join(output_dir, f"{pdb_id}{ext}")
69
+ if os.path.exists(file_path):
70
+ return file_path
71
+ url = f"https://files.rcsb.org/download/{pdb_id}{ext}"
72
+ try:
73
+ response = requests.get(url, timeout=10)
74
+ if response.status_code == 200:
75
+ with open(file_path, 'wb') as f:
76
+ f.write(response.content)
77
+ return file_path
78
+ except Exception as e:
79
+ print(f"Download error for {pdb_id}{ext}: {e}")
80
+ return None
81
+
82
+ def convert_cif_to_pdb(cif_path: str, output_dir: str = ".") -> str:
83
+ """
84
+ Convert a CIF file to PDB format using BioPython and return the PDB file path.
85
+ """
86
+ pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))
87
+ parser = MMCIFParser(QUIET=True)
88
+ structure = parser.get_structure('protein', cif_path)
89
+ io = PDBIO()
90
+ io.set_structure(structure)
91
+ io.save(pdb_path)
92
+ return pdb_path
93
+
94
+ def fetch_pdb(pdb_id):
95
+ pdb_path = fetch_structure(pdb_id)
96
  if not pdb_path:
97
+ return None
98
+ _, ext = os.path.splitext(pdb_path)
99
+ if ext == '.cif':
100
+ pdb_path = convert_cif_to_pdb(pdb_path)
101
+ return pdb_path
102
+
103
+ def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:
104
+ """
105
+ Create a PDB file with only the specified chain and replace B-factor with prediction scores
106
+ """
107
+ # Read the original PDB file
108
+ parser = PDBParser(QUIET=True)
109
+ structure = parser.get_structure('protein', input_pdb)
110
 
111
+ # Prepare a new structure with only the specified chain
112
+ new_structure = structure.copy()
113
+ for model in new_structure:
114
+ # Remove all chains except the specified one
115
+ chains_to_remove = [chain for chain in model if chain.id != chain_id]
116
+ for chain in chains_to_remove:
117
+ model.detach_child(chain.id)
118
+
119
+ # Create a modified PDB with scores in B-factor
120
+ scores_dict = {resi: score for resi, score in residue_scores}
121
+ for model in new_structure:
122
+ for chain in model:
123
+ for residue in chain:
124
+ if residue.id[1] in scores_dict:
125
+ for atom in residue:
126
+ atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range
127
+
128
+ # Save the modified structure
129
+ output_pdb = f"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb"
130
+ io = PDBIO()
131
+ io.set_structure(new_structure)
132
+ io.save(output_pdb)
133
+
134
+ return output_pdb
135
+
136
+ def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):
137
+ """
138
+ Calculate the geometric center of high-scoring residues
139
+ """
140
+ parser = PDBParser(QUIET=True)
141
  structure = parser.get_structure('protein', pdb_path)
142
 
143
+ # Collect coordinates of CA atoms from high-scoring residues
144
+ coords = []
145
+ for model in structure:
146
+ for chain in model:
147
+ if chain.id == chain_id:
148
+ for residue in chain:
149
+ if residue.id[1] in high_score_residues:
150
+ if 'CA' in residue: # Use alpha carbon as representative
151
+ ca_atom = residue['CA']
152
+ coords.append(ca_atom.coord)
153
+
154
+ # Calculate geometric center
155
+ if coords:
156
+ center = np.mean(coords, axis=0)
157
+ return center
158
+ return None
159
+
160
+
161
+
162
+ def process_pdb(pdb_id_or_file, segment):
163
+ # Determine if input is a PDB ID or file path
164
+ if pdb_id_or_file.endswith('.pdb'):
165
+ pdb_path = pdb_id_or_file
166
+ pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]
167
+ else:
168
+ pdb_id = pdb_id_or_file
169
+ pdb_path = fetch_pdb(pdb_id)
170
+
171
+ if not pdb_path:
172
+ return "Failed to fetch PDB file", None, None
173
+
174
+ # Determine the file format and choose the appropriate parser
175
+ _, ext = os.path.splitext(pdb_path)
176
+ parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)
177
+
178
+ try:
179
+ # Parse the structure file
180
+ structure = parser.get_structure('protein', pdb_path)
181
+ except Exception as e:
182
+ return f"Error parsing structure file: {e}", None, None
183
+
184
+ # Extract the specified chain
185
  try:
186
  chain = structure[0][segment]
187
  except KeyError:
188
  return "Invalid Chain ID", None, None
189
 
190
+ protein_residues = [res for res in chain if is_aa(res)]
191
+ sequence = "".join(seq1(res.resname) for res in protein_residues)
192
+ sequence_id = [res.id[1] for res in protein_residues]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  # Prepare input for model prediction
195
  input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
196
  with torch.no_grad():
197
  outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
198
+
199
  # Calculate scores and normalize them
200
  scores = expit(outputs[:, 1] - outputs[:, 0])
201
  normalized_scores = normalize_scores(scores)
 
 
 
202
 
203
+ # Zip residues with scores to track the residue ID and score
204
+ residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]
205
+
206
+ # Identify high and mid scoring residues
207
+ high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
208
+ mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
209
+
210
+ # Calculate geometric center of high-scoring residues
211
+ geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)
212
+ pymol_selection = f"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}"
213
+ pymol_center_cmd = f"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}" if geo_center is not None else ""
214
+
215
+ # Generate the result string
216
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
217
+ result_str = f"Prediction for PDB: {pdb_id}, Chain: {segment}\nDate: {current_time}\n\n"
218
+ result_str += "Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\n\n"
219
+ result_str += "\n".join([
220
+ f"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
221
+ for i, res in enumerate(protein_residues)])
222
 
223
+ # Create prediction and scored PDB files
224
  prediction_file = f"{pdb_id}_predictions.txt"
225
  with open(prediction_file, "w") as f:
226
  f.write(result_str)
227
+
228
+ # Create chain-specific PDB with scores in B-factor
229
+ scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)
230
+
231
+ # Molecule visualization with updated script
232
+ mol_vis = molecule(pdb_path, residue_scores, segment)
233
+
234
+ # Construct PyMOL command suggestions
235
+ pymol_commands = f"""
236
+ PyMOL Visualization Commands:
237
+ 1. Load PDB: load {os.path.abspath(pdb_path)}
238
+ 2. Select high-scoring residues: {pymol_selection}
239
+ 3. Highlight high-scoring residues: show sticks, high_score_residues
240
+ {pymol_center_cmd}
241
+ """
242
 
243
+ return result_str + "\n\n" + pymol_commands, mol_vis, [prediction_file, scored_pdb]
244
+
245
 
246
  def molecule(input_pdb, residue_scores=None, segment='A'):
247
  mol = read_mol(input_pdb) # Read PDB file content
248
+
249
  # Prepare high-scoring residues script if scores are provided
250
  high_score_script = ""
251
  if residue_scores is not None:
252
+ # Filter residues based on their scores
253
  high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
254
  mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
255
 
256
  high_score_script = """
257
+ // Load the original model and apply white cartoon style
258
+ let chainModel = viewer.addModel(pdb, "pdb");
259
+ chainModel.setStyle({}, {});
260
+ chainModel.setStyle(
 
261
  {"chain": "%s"},
262
+ {"cartoon": {"color": "white"}}
263
  );
264
+
265
+ // Create a new model for high-scoring residues and apply red sticks style
266
+ let highScoreModel = viewer.addModel(pdb, "pdb");
267
+ highScoreModel.setStyle({}, {});
268
+ highScoreModel.setStyle(
269
+ {"chain": "%s", "resi": [%s]},
270
  {"stick": {"color": "red"}}
271
  );
272
 
273
+ // Create a new model for medium-scoring residues and apply orange sticks style
274
+ let midScoreModel = viewer.addModel(pdb, "pdb");
275
+ midScoreModel.setStyle({}, {});
276
+ midScoreModel.setStyle(
277
+ {"chain": "%s", "resi": [%s]},
278
  {"stick": {"color": "orange"}}
279
  );
280
+ """ % (
281
+ segment,
282
+ segment,
283
+ ", ".join(str(resi) for resi in high_score_residues),
284
+ segment,
285
+ ", ".join(str(resi) for resi in mid_score_residues)
286
+ )
287
 
288
+ # Generate the full HTML content
289
  html_content = f"""
290
  <!DOCTYPE html>
291
  <html>
 
309
  let element = $("#container");
310
  let config = {{ backgroundColor: "white" }};
311
  let viewer = $3Dmol.createViewer(element, config);
 
 
 
 
 
 
 
312
 
313
  {high_score_script}
314
 
 
350
  # Return the HTML content within an iframe safely encoded for special characters
351
  return f'<iframe width="100%" height="700" srcdoc="{html_content.replace(chr(34), "&quot;").replace(chr(39), "&#39;")}"></iframe>'
352
 
 
 
 
 
 
 
 
 
 
 
353
 
354
  # Gradio UI
355
  with gr.Blocks() as demo:
356
  gr.Markdown("# Protein Binding Site Prediction")
357
+
358
  with gr.Row():
359
+ pdb_input = gr.Textbox(value="4BDU", label="PDB ID", placeholder="Enter PDB ID here...")
360
  visualize_btn = gr.Button("Visualize Structure")
361
 
362
+ molecule_output2 = Molecule3D(label="Protein Structure", reps=[
363
+ {
364
+ "model": 0,
365
+ "style": "cartoon",
366
+ "color": "whiteCarbon",
367
+ "residue_range": "",
368
+ "around": 0,
369
+ "byres": False,
370
+ }
371
+ ])
372
 
373
  with gr.Row():
 
374
  segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
375
  prediction_btn = gr.Button("Predict Binding Site")
376
 
377
+
378
  molecule_output = gr.HTML(label="Protein Structure")
379
  predictions_output = gr.Textbox(label="Binding Site Predictions")
380
+ download_output = gr.File(label="Download Files", file_count="multiple")
 
 
 
 
381
 
382
+ prediction_btn.click(
383
+ process_pdb,
384
+ inputs=[
385
+ pdb_input,
386
+ segment_input
387
+ ],
388
+ outputs=[predictions_output, molecule_output, download_output]
389
+ )
390
+
391
+ visualize_btn.click(
392
+ fetch_pdb,
393
+ inputs=[pdb_input],
394
+ outputs=molecule_output2
395
+ )
396
+
397
  gr.Markdown("## Examples")
398
  gr.Examples(
399
  examples=[
.ipynb_checkpoints/test3-checkpoint.ipynb ADDED
@@ -0,0 +1,1599 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 18,
6
+ "id": "2b84eb4e-3f91-4a28-8e4f-322a34a9fb55",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "* Running on local URL: http://127.0.0.1:7877\n",
14
+ "* Running on public URL: https://a35567ec94eccaf8d1.gradio.live\n",
15
+ "\n",
16
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
17
+ ]
18
+ },
19
+ {
20
+ "data": {
21
+ "text/html": [
22
+ "<div><iframe src=\"https://a35567ec94eccaf8d1.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
23
+ ],
24
+ "text/plain": [
25
+ "<IPython.core.display.HTML object>"
26
+ ]
27
+ },
28
+ "metadata": {},
29
+ "output_type": "display_data"
30
+ },
31
+ {
32
+ "data": {
33
+ "text/plain": []
34
+ },
35
+ "execution_count": 18,
36
+ "metadata": {},
37
+ "output_type": "execute_result"
38
+ }
39
+ ],
40
+ "source": [
41
+ "from Bio.PDB import PDBParser, MMCIFParser, MMCIF2Dict, PDBIO\n",
42
+ "from Bio.PDB.Polypeptide import is_aa\n",
43
+ "from Bio.SeqUtils import seq1\n",
44
+ "import gradio as gr\n",
45
+ "import numpy as np\n",
46
+ "import os\n",
47
+ "import requests\n",
48
+ "from gradio_molecule3d import Molecule3D\n",
49
+ "from scipy.special import expit\n",
50
+ "from typing import Optional\n",
51
+ "\n",
52
+ "def normalize_scores(scores):\n",
53
+ " min_score = np.min(scores)\n",
54
+ " max_score = np.max(scores)\n",
55
+ " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
56
+ "\n",
57
+ "def read_mol(pdb_path):\n",
58
+ " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
59
+ " with open(pdb_path, 'r') as f:\n",
60
+ " return f.read()\n",
61
+ "\n",
62
+ "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
63
+ " \"\"\"\n",
64
+ " Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
65
+ " If a structure file already exists locally, it uses that.\n",
66
+ " \"\"\"\n",
67
+ " file_path = download_structure(pdb_id, output_dir)\n",
68
+ " if file_path:\n",
69
+ " return file_path\n",
70
+ " else:\n",
71
+ " return None\n",
72
+ "\n",
73
+ "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
74
+ " \"\"\"\n",
75
+ " Attempt to download the structure file in CIF or PDB format.\n",
76
+ " Returns the path to the downloaded file, or None if download fails.\n",
77
+ " \"\"\"\n",
78
+ " for ext in ['.cif', '.pdb']:\n",
79
+ " file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
80
+ " if os.path.exists(file_path):\n",
81
+ " return file_path\n",
82
+ " url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
83
+ " try:\n",
84
+ " response = requests.get(url, timeout=10)\n",
85
+ " if response.status_code == 200:\n",
86
+ " with open(file_path, 'wb') as f:\n",
87
+ " f.write(response.content)\n",
88
+ " return file_path\n",
89
+ " except Exception as e:\n",
90
+ " print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
91
+ " return None\n",
92
+ "\n",
93
+ "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
94
+ " \"\"\"\n",
95
+ " Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
96
+ " \"\"\"\n",
97
+ " pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
98
+ " parser = MMCIFParser(QUIET=True)\n",
99
+ " structure = parser.get_structure('protein', cif_path)\n",
100
+ " io = PDBIO()\n",
101
+ " io.set_structure(structure)\n",
102
+ " io.save(pdb_path)\n",
103
+ " return pdb_path\n",
104
+ "\n",
105
+ "def fetch_pdb(pdb_id):\n",
106
+ " pdb_path = fetch_structure(pdb_id)\n",
107
+ " if not pdb_path:\n",
108
+ " return None\n",
109
+ " _, ext = os.path.splitext(pdb_path)\n",
110
+ " if ext == '.cif':\n",
111
+ " pdb_path = convert_cif_to_pdb(pdb_path)\n",
112
+ " return pdb_path\n",
113
+ "\n",
114
+ "def process_pdb(pdb_id, segment):\n",
115
+ " # Fetch the PDB or CIF file\n",
116
+ " pdb_path = fetch_pdb(pdb_id)\n",
117
+ " if not pdb_path:\n",
118
+ " return \"Failed to fetch PDB file\", None, None\n",
119
+ " \n",
120
+ " # Determine the file format and choose the appropriate parser\n",
121
+ " _, ext = os.path.splitext(pdb_path)\n",
122
+ " parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
123
+ " \n",
124
+ " try:\n",
125
+ " # Parse the structure file\n",
126
+ " structure = parser.get_structure('protein', pdb_path)\n",
127
+ " except Exception as e:\n",
128
+ " return f\"Error parsing structure file: {e}\", None, None\n",
129
+ " \n",
130
+ " # Extract the specified chain\n",
131
+ " try:\n",
132
+ " chain = structure[0][segment]\n",
133
+ " except KeyError:\n",
134
+ " return \"Invalid Chain ID\", None, None\n",
135
+ " \n",
136
+ " protein_residues = [res for res in chain if is_aa(res)]\n",
137
+ " sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
138
+ " sequence_id = [res.id[1] for res in protein_residues]\n",
139
+ " \n",
140
+ " # Generate random scores for residues\n",
141
+ " scores = np.random.rand(len(sequence))\n",
142
+ " normalized_scores = normalize_scores(scores)\n",
143
+ " \n",
144
+ " # Zip residues with scores to track the residue ID and score\n",
145
+ " residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
146
+ "\n",
147
+ " # Generate the result string\n",
148
+ " result_str = \"\\n\".join([\n",
149
+ " f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
150
+ " for i, res in enumerate(protein_residues)])\n",
151
+ " \n",
152
+ " # Save the predictions to a file\n",
153
+ " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
154
+ " with open(prediction_file, \"w\") as f:\n",
155
+ " f.write(result_str)\n",
156
+ "\n",
157
+ " _, ext = os.path.splitext(pdb_path)\n",
158
+ " if ext == '.cif':\n",
159
+ " pdb_path = convert_cif_to_pdb(pdb_path)\n",
160
+ "\n",
161
+ " return result_str, molecule(pdb_path, residue_scores, segment), prediction_file\n",
162
+ "\n",
163
+ "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
164
+ " mol = read_mol(input_pdb) # Read PDB file content\n",
165
+ " \n",
166
+ " # Prepare high-scoring residues script if scores are provided\n",
167
+ " high_score_script = \"\"\n",
168
+ " if residue_scores is not None:\n",
169
+ " # Sort residues based on their scores\n",
170
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
171
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
172
+ " \n",
173
+ " high_score_script = \"\"\"\n",
174
+ " // Reset all styles first\n",
175
+ " viewer.getModel(0).setStyle({}, {});\n",
176
+ " \n",
177
+ " // Show only the selected chain\n",
178
+ " viewer.getModel(0).setStyle(\n",
179
+ " {\"chain\": \"%s\"}, \n",
180
+ " { cartoon: {colorscheme:\"whiteCarbon\"} }\n",
181
+ " );\n",
182
+ " \n",
183
+ " // Highlight high-scoring residues only for the selected chain\n",
184
+ " let highScoreResidues = [%s];\n",
185
+ " viewer.getModel(0).setStyle(\n",
186
+ " {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
187
+ " {\"stick\": {\"color\": \"red\"}}\n",
188
+ " );\n",
189
+ "\n",
190
+ " // Highlight medium-scoring residues only for the selected chain\n",
191
+ " let midScoreResidues = [%s];\n",
192
+ " viewer.getModel(0).setStyle(\n",
193
+ " {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
194
+ " {\"stick\": {\"color\": \"orange\"}}\n",
195
+ " );\n",
196
+ " \"\"\" % (segment, \n",
197
+ " \", \".join(str(resi) for resi in high_score_residues),\n",
198
+ " segment,\n",
199
+ " \", \".join(str(resi) for resi in mid_score_residues),\n",
200
+ " segment)\n",
201
+ " \n",
202
+ " html_content = f\"\"\"\n",
203
+ " <!DOCTYPE html>\n",
204
+ " <html>\n",
205
+ " <head> \n",
206
+ " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
207
+ " <style>\n",
208
+ " .mol-container {{\n",
209
+ " width: 100%;\n",
210
+ " height: 700px;\n",
211
+ " position: relative;\n",
212
+ " }}\n",
213
+ " </style>\n",
214
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
215
+ " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
216
+ " </head>\n",
217
+ " <body>\n",
218
+ " <div id=\"container\" class=\"mol-container\"></div>\n",
219
+ " <script>\n",
220
+ " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
221
+ " $(document).ready(function () {{\n",
222
+ " let element = $(\"#container\");\n",
223
+ " let config = {{ backgroundColor: \"white\" }};\n",
224
+ " let viewer = $3Dmol.createViewer(element, config);\n",
225
+ " viewer.addModel(pdb, \"pdb\");\n",
226
+ " \n",
227
+ " // Reset all styles and show only selected chain\n",
228
+ " viewer.getModel(0).setStyle(\n",
229
+ " {{\"chain\": \"{segment}\"}}, \n",
230
+ " {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }}\n",
231
+ " );\n",
232
+ " \n",
233
+ " {high_score_script}\n",
234
+ " \n",
235
+ " // Add hover functionality\n",
236
+ " viewer.setHoverable(\n",
237
+ " {{}}, \n",
238
+ " true, \n",
239
+ " function(atom, viewer, event, container) {{\n",
240
+ " if (!atom.label) {{\n",
241
+ " atom.label = viewer.addLabel(\n",
242
+ " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
243
+ " {{\n",
244
+ " position: atom, \n",
245
+ " backgroundColor: 'mintcream', \n",
246
+ " fontColor: 'black',\n",
247
+ " fontSize: 12,\n",
248
+ " padding: 2\n",
249
+ " }}\n",
250
+ " );\n",
251
+ " }}\n",
252
+ " }},\n",
253
+ " function(atom, viewer) {{\n",
254
+ " if (atom.label) {{\n",
255
+ " viewer.removeLabel(atom.label);\n",
256
+ " delete atom.label;\n",
257
+ " }}\n",
258
+ " }}\n",
259
+ " );\n",
260
+ " \n",
261
+ " viewer.zoomTo();\n",
262
+ " viewer.render();\n",
263
+ " viewer.zoom(0.8, 2000);\n",
264
+ " }});\n",
265
+ " </script>\n",
266
+ " </body>\n",
267
+ " </html>\n",
268
+ " \"\"\"\n",
269
+ " \n",
270
+ " # Return the HTML content within an iframe safely encoded for special characters\n",
271
+ " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
272
+ "\n",
273
+ "reps = [\n",
274
+ " {\n",
275
+ " \"model\": 0,\n",
276
+ " \"style\": \"cartoon\",\n",
277
+ " \"color\": \"whiteCarbon\",\n",
278
+ " \"residue_range\": \"\",\n",
279
+ " \"around\": 0,\n",
280
+ " \"byres\": False,\n",
281
+ " }\n",
282
+ "]\n",
283
+ "\n",
284
+ "# Gradio UI\n",
285
+ "with gr.Blocks() as demo:\n",
286
+ " gr.Markdown(\"# Protein Binding Site Prediction\")\n",
287
+ " with gr.Row():\n",
288
+ " pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
289
+ " visualize_btn = gr.Button(\"Visualize Structure\")\n",
290
+ "\n",
291
+ " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
292
+ "\n",
293
+ " with gr.Row():\n",
294
+ " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
295
+ " prediction_btn = gr.Button(\"Predict Binding Site\")\n",
296
+ "\n",
297
+ " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
298
+ " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
299
+ " download_output = gr.File(label=\"Download Predictions\")\n",
300
+ " \n",
301
+ " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n",
302
+ " \n",
303
+ " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
304
+ " \n",
305
+ " gr.Markdown(\"## Examples\")\n",
306
+ " gr.Examples(\n",
307
+ " examples=[\n",
308
+ " [\"7RPZ\", \"A\"],\n",
309
+ " [\"2IWI\", \"B\"],\n",
310
+ " [\"2F6V\", \"A\"]\n",
311
+ " ],\n",
312
+ " inputs=[pdb_input, segment_input],\n",
313
+ " outputs=[predictions_output, molecule_output, download_output]\n",
314
+ " )\n",
315
+ "\n",
316
+ "demo.launch(share=True)"
317
+ ]
318
+ },
319
+ {
320
+ "cell_type": "code",
321
+ "execution_count": 20,
322
+ "id": "a2f1ca04-7a27-4e4f-b44d-39b20c5d034a",
323
+ "metadata": {},
324
+ "outputs": [
325
+ {
326
+ "name": "stdout",
327
+ "output_type": "stream",
328
+ "text": [
329
+ "* Running on local URL: http://127.0.0.1:7878\n",
330
+ "* Running on public URL: https://fbfb00e893a2d7c6ae.gradio.live\n",
331
+ "\n",
332
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
333
+ ]
334
+ },
335
+ {
336
+ "data": {
337
+ "text/html": [
338
+ "<div><iframe src=\"https://fbfb00e893a2d7c6ae.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
339
+ ],
340
+ "text/plain": [
341
+ "<IPython.core.display.HTML object>"
342
+ ]
343
+ },
344
+ "metadata": {},
345
+ "output_type": "display_data"
346
+ },
347
+ {
348
+ "data": {
349
+ "text/plain": []
350
+ },
351
+ "execution_count": 20,
352
+ "metadata": {},
353
+ "output_type": "execute_result"
354
+ }
355
+ ],
356
+ "source": [
357
+ "import os\n",
358
+ "from datetime import datetime\n",
359
+ "import gradio as gr\n",
360
+ "import numpy as np\n",
361
+ "import requests\n",
362
+ "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n",
363
+ "from Bio.PDB.Polypeptide import is_aa\n",
364
+ "from Bio.SeqUtils import seq1\n",
365
+ "from gradio_molecule3d import Molecule3D\n",
366
+ "from typing import Optional, Tuple\n",
367
+ "\n",
368
+ "def normalize_scores(scores):\n",
369
+ " min_score = np.min(scores)\n",
370
+ " max_score = np.max(scores)\n",
371
+ " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
372
+ "\n",
373
+ "def read_mol(pdb_path):\n",
374
+ " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
375
+ " with open(pdb_path, 'r') as f:\n",
376
+ " return f.read()\n",
377
+ "\n",
378
+ "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
379
+ " \"\"\"\n",
380
+ " Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
381
+ " If a structure file already exists locally, it uses that.\n",
382
+ " \"\"\"\n",
383
+ " file_path = download_structure(pdb_id, output_dir)\n",
384
+ " if file_path:\n",
385
+ " return file_path\n",
386
+ " else:\n",
387
+ " return None\n",
388
+ "\n",
389
+ "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
390
+ " \"\"\"\n",
391
+ " Attempt to download the structure file in CIF or PDB format.\n",
392
+ " Returns the path to the downloaded file, or None if download fails.\n",
393
+ " \"\"\"\n",
394
+ " for ext in ['.cif', '.pdb']:\n",
395
+ " file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
396
+ " if os.path.exists(file_path):\n",
397
+ " return file_path\n",
398
+ " url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
399
+ " try:\n",
400
+ " response = requests.get(url, timeout=10)\n",
401
+ " if response.status_code == 200:\n",
402
+ " with open(file_path, 'wb') as f:\n",
403
+ " f.write(response.content)\n",
404
+ " return file_path\n",
405
+ " except Exception as e:\n",
406
+ " print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
407
+ " return None\n",
408
+ "\n",
409
+ "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
410
+ " \"\"\"\n",
411
+ " Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
412
+ " \"\"\"\n",
413
+ " pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
414
+ " parser = MMCIFParser(QUIET=True)\n",
415
+ " structure = parser.get_structure('protein', cif_path)\n",
416
+ " io = PDBIO()\n",
417
+ " io.set_structure(structure)\n",
418
+ " io.save(pdb_path)\n",
419
+ " return pdb_path\n",
420
+ "\n",
421
+ "def fetch_pdb(pdb_id):\n",
422
+ " pdb_path = fetch_structure(pdb_id)\n",
423
+ " if not pdb_path:\n",
424
+ " return None\n",
425
+ " _, ext = os.path.splitext(pdb_path)\n",
426
+ " if ext == '.cif':\n",
427
+ " pdb_path = convert_cif_to_pdb(pdb_path)\n",
428
+ " return pdb_path\n",
429
+ "\n",
430
+ "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n",
431
+ " \"\"\"\n",
432
+ " Create a PDB file with only the specified chain and replace B-factor with prediction scores\n",
433
+ " \"\"\"\n",
434
+ " # Read the original PDB file\n",
435
+ " parser = PDBParser(QUIET=True)\n",
436
+ " structure = parser.get_structure('protein', input_pdb)\n",
437
+ " \n",
438
+ " # Prepare a new structure with only the specified chain\n",
439
+ " new_structure = structure.copy()\n",
440
+ " for model in new_structure:\n",
441
+ " # Remove all chains except the specified one\n",
442
+ " chains_to_remove = [chain for chain in model if chain.id != chain_id]\n",
443
+ " for chain in chains_to_remove:\n",
444
+ " model.detach_child(chain.id)\n",
445
+ " \n",
446
+ " # Create a modified PDB with scores in B-factor\n",
447
+ " scores_dict = {resi: score for resi, score in residue_scores}\n",
448
+ " for model in new_structure:\n",
449
+ " for chain in model:\n",
450
+ " for residue in chain:\n",
451
+ " if residue.id[1] in scores_dict:\n",
452
+ " for atom in residue:\n",
453
+ " atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range\n",
454
+ " \n",
455
+ " # Save the modified structure\n",
456
+ " output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n",
457
+ " io = PDBIO()\n",
458
+ " io.set_structure(new_structure)\n",
459
+ " io.save(output_pdb)\n",
460
+ " \n",
461
+ " return output_pdb\n",
462
+ "\n",
463
+ "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n",
464
+ " \"\"\"\n",
465
+ " Calculate the geometric center of high-scoring residues\n",
466
+ " \"\"\"\n",
467
+ " parser = PDBParser(QUIET=True)\n",
468
+ " structure = parser.get_structure('protein', pdb_path)\n",
469
+ " \n",
470
+ " # Collect coordinates of CA atoms from high-scoring residues\n",
471
+ " coords = []\n",
472
+ " for model in structure:\n",
473
+ " for chain in model:\n",
474
+ " if chain.id == chain_id:\n",
475
+ " for residue in chain:\n",
476
+ " if residue.id[1] in high_score_residues:\n",
477
+ " if 'CA' in residue: # Use alpha carbon as representative\n",
478
+ " ca_atom = residue['CA']\n",
479
+ " coords.append(ca_atom.coord)\n",
480
+ " \n",
481
+ " # Calculate geometric center\n",
482
+ " if coords:\n",
483
+ " center = np.mean(coords, axis=0)\n",
484
+ " return center\n",
485
+ " return None\n",
486
+ "\n",
487
+ "def process_pdb(pdb_id_or_file, segment):\n",
488
+ " # Determine if input is a PDB ID or file path\n",
489
+ " if pdb_id_or_file.endswith('.pdb'):\n",
490
+ " pdb_path = pdb_id_or_file\n",
491
+ " pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n",
492
+ " else:\n",
493
+ " pdb_id = pdb_id_or_file\n",
494
+ " pdb_path = fetch_pdb(pdb_id)\n",
495
+ " \n",
496
+ " if not pdb_path:\n",
497
+ " return \"Failed to fetch PDB file\", None, None\n",
498
+ " \n",
499
+ " # Determine the file format and choose the appropriate parser\n",
500
+ " _, ext = os.path.splitext(pdb_path)\n",
501
+ " parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
502
+ " \n",
503
+ " try:\n",
504
+ " # Parse the structure file\n",
505
+ " structure = parser.get_structure('protein', pdb_path)\n",
506
+ " except Exception as e:\n",
507
+ " return f\"Error parsing structure file: {e}\", None, None\n",
508
+ " \n",
509
+ " # Extract the specified chain\n",
510
+ " try:\n",
511
+ " chain = structure[0][segment]\n",
512
+ " except KeyError:\n",
513
+ " return \"Invalid Chain ID\", None, None\n",
514
+ " \n",
515
+ " protein_residues = [res for res in chain if is_aa(res)]\n",
516
+ " sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
517
+ " sequence_id = [res.id[1] for res in protein_residues]\n",
518
+ " \n",
519
+ " # Generate random scores for residues\n",
520
+ " scores = np.random.rand(len(sequence))\n",
521
+ " normalized_scores = normalize_scores(scores)\n",
522
+ " \n",
523
+ " # Zip residues with scores to track the residue ID and score\n",
524
+ " residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
525
+ "\n",
526
+ " # Identify high and mid scoring residues\n",
527
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
528
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
529
+ "\n",
530
+ " # Calculate geometric center of high-scoring residues\n",
531
+ " geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n",
532
+ " pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n",
533
+ " pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n",
534
+ "\n",
535
+ " # Generate the result string\n",
536
+ " current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
537
+ " result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n",
538
+ " result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n",
539
+ " result_str += \"\\n\".join([\n",
540
+ " f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
541
+ " for i, res in enumerate(protein_residues)])\n",
542
+ " \n",
543
+ " # Create prediction and scored PDB files\n",
544
+ " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
545
+ " with open(prediction_file, \"w\") as f:\n",
546
+ " f.write(result_str)\n",
547
+ "\n",
548
+ " # Create chain-specific PDB with scores in B-factor\n",
549
+ " scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n",
550
+ "\n",
551
+ " # Molecule visualization with updated script\n",
552
+ " mol_vis = molecule(pdb_path, residue_scores, segment)\n",
553
+ "\n",
554
+ " # Construct PyMOL command suggestions\n",
555
+ " pymol_commands = f\"\"\"\n",
556
+ "PyMOL Visualization Commands:\n",
557
+ "1. Load PDB: load {os.path.abspath(pdb_path)}\n",
558
+ "2. Select high-scoring residues: {pymol_selection}\n",
559
+ "3. Highlight high-scoring residues: show sticks, high_score_residues\n",
560
+ "{pymol_center_cmd}\n",
561
+ "\"\"\"\n",
562
+ " \n",
563
+ " return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n",
564
+ "\n",
565
+ "# molecule() function remains the same as in the previous script, \n",
566
+ "# but modify the visualization script to ensure cartoon is below stick representations\n",
567
+ "\n",
568
+ "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
569
+ " mol = read_mol(input_pdb) # Read PDB file content\n",
570
+ " \n",
571
+ " # Prepare high-scoring residues script if scores are provided\n",
572
+ " high_score_script = \"\"\n",
573
+ " if residue_scores is not None:\n",
574
+ " # Sort residues based on their scores\n",
575
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
576
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
577
+ " \n",
578
+ " high_score_script = \"\"\"\n",
579
+ " // Reset all styles first\n",
580
+ " viewer.getModel(0).setStyle({}, {});\n",
581
+ " \n",
582
+ " // First, set background cartoon style for the entire chain (underneath)\n",
583
+ " viewer.getModel(0).setStyle(\n",
584
+ " {\"chain\": \"%s\"}, \n",
585
+ " { cartoon: {colorscheme:\"whiteCarbon\", opacity:0.7} }\n",
586
+ " );\n",
587
+ " \n",
588
+ " // Highlight high-scoring residues with sticks on top\n",
589
+ " let highScoreResidues = [%s];\n",
590
+ " viewer.getModel(0).setStyle(\n",
591
+ " {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
592
+ " {\"stick\": {\"color\": \"red\", \"opacity\": 1}}\n",
593
+ " );\n",
594
+ "\n",
595
+ " // Highlight medium-scoring residues\n",
596
+ " let midScoreResidues = [%s];\n",
597
+ " viewer.getModel(0).setStyle(\n",
598
+ " {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
599
+ " {\"stick\": {\"color\": \"orange\", \"opacity\": 0.8}}\n",
600
+ " );\n",
601
+ " \"\"\" % (segment, \n",
602
+ " \", \".join(str(resi) for resi in high_score_residues),\n",
603
+ " segment,\n",
604
+ " \", \".join(str(resi) for resi in mid_score_residues),\n",
605
+ " segment)\n",
606
+ " \n",
607
+ " # Rest of the molecule() function remains the same as in the previous script\n",
608
+ " \n",
609
+ " html_content = f\"\"\"\n",
610
+ " <!DOCTYPE html>\n",
611
+ " <html>\n",
612
+ " <head> \n",
613
+ " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
614
+ " <style>\n",
615
+ " .mol-container {{\n",
616
+ " width: 100%;\n",
617
+ " height: 700px;\n",
618
+ " position: relative;\n",
619
+ " }}\n",
620
+ " </style>\n",
621
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
622
+ " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
623
+ " </head>\n",
624
+ " <body>\n",
625
+ " <div id=\"container\" class=\"mol-container\"></div>\n",
626
+ " <script>\n",
627
+ " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
628
+ " $(document).ready(function () {{\n",
629
+ " let element = $(\"#container\");\n",
630
+ " let config = {{ backgroundColor: \"white\" }};\n",
631
+ " let viewer = $3Dmol.createViewer(element, config);\n",
632
+ " viewer.addModel(pdb, \"pdb\");\n",
633
+ " \n",
634
+ " {high_score_script}\n",
635
+ " \n",
636
+ " // Add hover functionality (unchanged from before)\n",
637
+ " viewer.setHoverable(\n",
638
+ " {{}}, \n",
639
+ " true, \n",
640
+ " function(atom, viewer, event, container) {{\n",
641
+ " if (!atom.label) {{\n",
642
+ " atom.label = viewer.addLabel(\n",
643
+ " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
644
+ " {{\n",
645
+ " position: atom, \n",
646
+ " backgroundColor: 'mintcream', \n",
647
+ " fontColor: 'black',\n",
648
+ " fontSize: 12,\n",
649
+ " padding: 2\n",
650
+ " }}\n",
651
+ " );\n",
652
+ " }}\n",
653
+ " }},\n",
654
+ " function(atom, viewer) {{\n",
655
+ " if (atom.label) {{\n",
656
+ " viewer.removeLabel(atom.label);\n",
657
+ " delete atom.label;\n",
658
+ " }}\n",
659
+ " }}\n",
660
+ " );\n",
661
+ " \n",
662
+ " viewer.zoomTo();\n",
663
+ " viewer.render();\n",
664
+ " viewer.zoom(0.8, 2000);\n",
665
+ " }});\n",
666
+ " </script>\n",
667
+ " </body>\n",
668
+ " </html>\n",
669
+ " \"\"\"\n",
670
+ " \n",
671
+ " # Return the HTML content within an iframe safely encoded for special characters\n",
672
+ " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
673
+ "\n",
674
+ "# Gradio UI\n",
675
+ "with gr.Blocks() as demo:\n",
676
+ " gr.Markdown(\"# Protein Binding Site Prediction\")\n",
677
+ " \n",
678
+ " with gr.Row():\n",
679
+ " pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
680
+ " file_input = gr.File(label=\"Or Upload PDB File\", file_types=['.pdb'], type=\"filepath\")\n",
681
+ " visualize_btn = gr.Button(\"Visualize Structure\")\n",
682
+ "\n",
683
+ " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n",
684
+ " {\n",
685
+ " \"model\": 0,\n",
686
+ " \"style\": \"cartoon\",\n",
687
+ " \"color\": \"whiteCarbon\",\n",
688
+ " \"residue_range\": \"\",\n",
689
+ " \"around\": 0,\n",
690
+ " \"byres\": False,\n",
691
+ " }\n",
692
+ " ])\n",
693
+ "\n",
694
+ " with gr.Row():\n",
695
+ " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
696
+ " prediction_btn = gr.Button(\"Predict Binding Site\")\n",
697
+ "\n",
698
+ " def process_input(pdb_id, uploaded_file):\n",
699
+ " \"\"\"\n",
700
+ " Determine whether to use PDB ID or uploaded file\n",
701
+ " \"\"\"\n",
702
+ " if uploaded_file and uploaded_file.endswith('.pdb'):\n",
703
+ " return uploaded_file\n",
704
+ " return pdb_id\n",
705
+ "\n",
706
+ " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
707
+ " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
708
+ " download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n",
709
+ " \n",
710
+ " prediction_btn.click(\n",
711
+ " process_pdb, \n",
712
+ " inputs=[\n",
713
+ " gr.State(lambda: process_input(pdb_input.value, file_input.value)), \n",
714
+ " segment_input\n",
715
+ " ], \n",
716
+ " outputs=[predictions_output, molecule_output, download_output]\n",
717
+ " )\n",
718
+ "\n",
719
+ " visualize_btn.click(\n",
720
+ " fetch_pdb, \n",
721
+ " inputs=[pdb_input], \n",
722
+ " outputs=molecule_output2\n",
723
+ " )\n",
724
+ "\n",
725
+ " gr.Markdown(\"## Examples\")\n",
726
+ " gr.Examples(\n",
727
+ " examples=[\n",
728
+ " [\"7RPZ\", \"A\"],\n",
729
+ " [\"2IWI\", \"B\"],\n",
730
+ " [\"2F6V\", \"A\"]\n",
731
+ " ],\n",
732
+ " inputs=[pdb_input, segment_input],\n",
733
+ " outputs=[predictions_output, molecule_output, download_output]\n",
734
+ " )\n",
735
+ "\n",
736
+ "demo.launch(share=True)"
737
+ ]
738
+ },
739
+ {
740
+ "cell_type": "code",
741
+ "execution_count": 32,
742
+ "id": "5b266025-7503-48f5-9371-3642d09f7e93",
743
+ "metadata": {},
744
+ "outputs": [
745
+ {
746
+ "name": "stdout",
747
+ "output_type": "stream",
748
+ "text": [
749
+ "* Running on local URL: http://127.0.0.1:7890\n",
750
+ "* Running on public URL: https://70a6e80d8deb42ddd0.gradio.live\n",
751
+ "\n",
752
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
753
+ ]
754
+ },
755
+ {
756
+ "data": {
757
+ "text/html": [
758
+ "<div><iframe src=\"https://70a6e80d8deb42ddd0.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
759
+ ],
760
+ "text/plain": [
761
+ "<IPython.core.display.HTML object>"
762
+ ]
763
+ },
764
+ "metadata": {},
765
+ "output_type": "display_data"
766
+ },
767
+ {
768
+ "data": {
769
+ "text/plain": []
770
+ },
771
+ "execution_count": 32,
772
+ "metadata": {},
773
+ "output_type": "execute_result"
774
+ }
775
+ ],
776
+ "source": [
777
+ "import os\n",
778
+ "from datetime import datetime\n",
779
+ "import gradio as gr\n",
780
+ "import numpy as np\n",
781
+ "import requests\n",
782
+ "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n",
783
+ "from Bio.PDB.Polypeptide import is_aa\n",
784
+ "from Bio.SeqUtils import seq1\n",
785
+ "from gradio_molecule3d import Molecule3D\n",
786
+ "from typing import Optional, Tuple\n",
787
+ "\n",
788
+ "def normalize_scores(scores):\n",
789
+ " min_score = np.min(scores)\n",
790
+ " max_score = np.max(scores)\n",
791
+ " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
792
+ "\n",
793
+ "def read_mol(pdb_path):\n",
794
+ " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
795
+ " with open(pdb_path, 'r') as f:\n",
796
+ " return f.read()\n",
797
+ "\n",
798
+ "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
799
+ " \"\"\"\n",
800
+ " Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
801
+ " If a structure file already exists locally, it uses that.\n",
802
+ " \"\"\"\n",
803
+ " file_path = download_structure(pdb_id, output_dir)\n",
804
+ " if file_path:\n",
805
+ " return file_path\n",
806
+ " else:\n",
807
+ " return None\n",
808
+ "\n",
809
+ "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
810
+ " \"\"\"\n",
811
+ " Attempt to download the structure file in CIF or PDB format.\n",
812
+ " Returns the path to the downloaded file, or None if download fails.\n",
813
+ " \"\"\"\n",
814
+ " for ext in ['.cif', '.pdb']:\n",
815
+ " file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
816
+ " if os.path.exists(file_path):\n",
817
+ " return file_path\n",
818
+ " url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
819
+ " try:\n",
820
+ " response = requests.get(url, timeout=10)\n",
821
+ " if response.status_code == 200:\n",
822
+ " with open(file_path, 'wb') as f:\n",
823
+ " f.write(response.content)\n",
824
+ " return file_path\n",
825
+ " except Exception as e:\n",
826
+ " print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
827
+ " return None\n",
828
+ "\n",
829
+ "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
830
+ " \"\"\"\n",
831
+ " Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
832
+ " \"\"\"\n",
833
+ " pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
834
+ " parser = MMCIFParser(QUIET=True)\n",
835
+ " structure = parser.get_structure('protein', cif_path)\n",
836
+ " io = PDBIO()\n",
837
+ " io.set_structure(structure)\n",
838
+ " io.save(pdb_path)\n",
839
+ " return pdb_path\n",
840
+ "\n",
841
+ "def fetch_pdb(pdb_id):\n",
842
+ " pdb_path = fetch_structure(pdb_id)\n",
843
+ " if not pdb_path:\n",
844
+ " return None\n",
845
+ " _, ext = os.path.splitext(pdb_path)\n",
846
+ " if ext == '.cif':\n",
847
+ " pdb_path = convert_cif_to_pdb(pdb_path)\n",
848
+ " return pdb_path\n",
849
+ "\n",
850
+ "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n",
851
+ " \"\"\"\n",
852
+ " Create a PDB file with only the specified chain and replace B-factor with prediction scores\n",
853
+ " \"\"\"\n",
854
+ " # Read the original PDB file\n",
855
+ " parser = PDBParser(QUIET=True)\n",
856
+ " structure = parser.get_structure('protein', input_pdb)\n",
857
+ " \n",
858
+ " # Prepare a new structure with only the specified chain\n",
859
+ " new_structure = structure.copy()\n",
860
+ " for model in new_structure:\n",
861
+ " # Remove all chains except the specified one\n",
862
+ " chains_to_remove = [chain for chain in model if chain.id != chain_id]\n",
863
+ " for chain in chains_to_remove:\n",
864
+ " model.detach_child(chain.id)\n",
865
+ " \n",
866
+ " # Create a modified PDB with scores in B-factor\n",
867
+ " scores_dict = {resi: score for resi, score in residue_scores}\n",
868
+ " for model in new_structure:\n",
869
+ " for chain in model:\n",
870
+ " for residue in chain:\n",
871
+ " if residue.id[1] in scores_dict:\n",
872
+ " for atom in residue:\n",
873
+ " atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range\n",
874
+ " \n",
875
+ " # Save the modified structure\n",
876
+ " output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n",
877
+ " io = PDBIO()\n",
878
+ " io.set_structure(new_structure)\n",
879
+ " io.save(output_pdb)\n",
880
+ " \n",
881
+ " return output_pdb\n",
882
+ "\n",
883
+ "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n",
884
+ " \"\"\"\n",
885
+ " Calculate the geometric center of high-scoring residues\n",
886
+ " \"\"\"\n",
887
+ " parser = PDBParser(QUIET=True)\n",
888
+ " structure = parser.get_structure('protein', pdb_path)\n",
889
+ " \n",
890
+ " # Collect coordinates of CA atoms from high-scoring residues\n",
891
+ " coords = []\n",
892
+ " for model in structure:\n",
893
+ " for chain in model:\n",
894
+ " if chain.id == chain_id:\n",
895
+ " for residue in chain:\n",
896
+ " if residue.id[1] in high_score_residues:\n",
897
+ " if 'CA' in residue: # Use alpha carbon as representative\n",
898
+ " ca_atom = residue['CA']\n",
899
+ " coords.append(ca_atom.coord)\n",
900
+ " \n",
901
+ " # Calculate geometric center\n",
902
+ " if coords:\n",
903
+ " center = np.mean(coords, axis=0)\n",
904
+ " return center\n",
905
+ " return None\n",
906
+ "\n",
907
+ "def process_pdb(pdb_id_or_file, segment):\n",
908
+ " # Determine if input is a PDB ID or file path\n",
909
+ " if pdb_id_or_file.endswith('.pdb'):\n",
910
+ " pdb_path = pdb_id_or_file\n",
911
+ " pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n",
912
+ " else:\n",
913
+ " pdb_id = pdb_id_or_file\n",
914
+ " pdb_path = fetch_pdb(pdb_id)\n",
915
+ " \n",
916
+ " if not pdb_path:\n",
917
+ " return \"Failed to fetch PDB file\", None, None\n",
918
+ " \n",
919
+ " # Determine the file format and choose the appropriate parser\n",
920
+ " _, ext = os.path.splitext(pdb_path)\n",
921
+ " parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
922
+ " \n",
923
+ " try:\n",
924
+ " # Parse the structure file\n",
925
+ " structure = parser.get_structure('protein', pdb_path)\n",
926
+ " except Exception as e:\n",
927
+ " return f\"Error parsing structure file: {e}\", None, None\n",
928
+ " \n",
929
+ " # Extract the specified chain\n",
930
+ " try:\n",
931
+ " chain = structure[0][segment]\n",
932
+ " except KeyError:\n",
933
+ " return \"Invalid Chain ID\", None, None\n",
934
+ " \n",
935
+ " protein_residues = [res for res in chain if is_aa(res)]\n",
936
+ " sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
937
+ " sequence_id = [res.id[1] for res in protein_residues]\n",
938
+ " \n",
939
+ " # Generate random scores for residues\n",
940
+ " scores = np.random.rand(len(sequence))\n",
941
+ " normalized_scores = normalize_scores(scores)\n",
942
+ " \n",
943
+ " # Zip residues with scores to track the residue ID and score\n",
944
+ " residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
945
+ "\n",
946
+ " # Identify high and mid scoring residues\n",
947
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
948
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
949
+ "\n",
950
+ " # Calculate geometric center of high-scoring residues\n",
951
+ " geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n",
952
+ " pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n",
953
+ " pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n",
954
+ "\n",
955
+ " # Generate the result string\n",
956
+ " current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
957
+ " result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n",
958
+ " result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n",
959
+ " result_str += \"\\n\".join([\n",
960
+ " f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
961
+ " for i, res in enumerate(protein_residues)])\n",
962
+ " \n",
963
+ " # Create prediction and scored PDB files\n",
964
+ " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
965
+ " with open(prediction_file, \"w\") as f:\n",
966
+ " f.write(result_str)\n",
967
+ "\n",
968
+ " # Create chain-specific PDB with scores in B-factor\n",
969
+ " scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n",
970
+ "\n",
971
+ " # Molecule visualization with updated script\n",
972
+ " mol_vis = molecule(pdb_path, residue_scores, segment)\n",
973
+ "\n",
974
+ " # Construct PyMOL command suggestions\n",
975
+ " pymol_commands = f\"\"\"\n",
976
+ "PyMOL Visualization Commands:\n",
977
+ "1. Load PDB: load {os.path.abspath(pdb_path)}\n",
978
+ "2. Select high-scoring residues: {pymol_selection}\n",
979
+ "3. Highlight high-scoring residues: show sticks, high_score_residues\n",
980
+ "{pymol_center_cmd}\n",
981
+ "\"\"\"\n",
982
+ " \n",
983
+ " return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n",
984
+ "\n",
985
+ "# molecule() function remains the same as in the previous script, \n",
986
+ "# but modify the visualization script to ensure cartoon is below stick representations\n",
987
+ "\n",
988
+ "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
989
+ " mol = read_mol(input_pdb) # Read PDB file content\n",
990
+ "\n",
991
+ " # Prepare high-scoring residues script if scores are provided\n",
992
+ " high_score_script = \"\"\n",
993
+ " if residue_scores is not None:\n",
994
+ " # Filter residues based on their scores\n",
995
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
996
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
997
+ " \n",
998
+ " high_score_script = \"\"\"\n",
999
+ " // Load the original model and apply white cartoon style\n",
1000
+ " let chainModel = viewer.addModel(pdb, \"pdb\");\n",
1001
+ " chainModel.setStyle(\n",
1002
+ " {\"chain\": \"%s\"}, \n",
1003
+ " {\"cartoon\": {\"color\": \"white\"}}\n",
1004
+ " );\n",
1005
+ "\n",
1006
+ " // Create a new model for high-scoring residues and apply red sticks style\n",
1007
+ " let highScoreModel = viewer.addModel(pdb, \"pdb\");\n",
1008
+ " highScoreModel.setStyle(\n",
1009
+ " {\"chain\": \"%s\", \"resi\": [%s]}, \n",
1010
+ " {\"stick\": {\"color\": \"red\"}}\n",
1011
+ " );\n",
1012
+ "\n",
1013
+ " // Create a new model for medium-scoring residues and apply orange sticks style\n",
1014
+ " let midScoreModel = viewer.addModel(pdb, \"pdb\");\n",
1015
+ " midScoreModel.setStyle(\n",
1016
+ " {\"chain\": \"%s\", \"resi\": [%s]}, \n",
1017
+ " {\"stick\": {\"color\": \"orange\"}}\n",
1018
+ " );\n",
1019
+ " \"\"\" % (\n",
1020
+ " segment,\n",
1021
+ " segment,\n",
1022
+ " \", \".join(str(resi) for resi in high_score_residues),\n",
1023
+ " segment,\n",
1024
+ " \", \".join(str(resi) for resi in mid_score_residues)\n",
1025
+ " )\n",
1026
+ " \n",
1027
+ " # Generate the full HTML content\n",
1028
+ " html_content = f\"\"\"\n",
1029
+ " <!DOCTYPE html>\n",
1030
+ " <html>\n",
1031
+ " <head> \n",
1032
+ " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
1033
+ " <style>\n",
1034
+ " .mol-container {{\n",
1035
+ " width: 100%;\n",
1036
+ " height: 700px;\n",
1037
+ " position: relative;\n",
1038
+ " }}\n",
1039
+ " </style>\n",
1040
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
1041
+ " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
1042
+ " </head>\n",
1043
+ " <body>\n",
1044
+ " <div id=\"container\" class=\"mol-container\"></div>\n",
1045
+ " <script>\n",
1046
+ " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
1047
+ " $(document).ready(function () {{\n",
1048
+ " let element = $(\"#container\");\n",
1049
+ " let config = {{ backgroundColor: \"white\" }};\n",
1050
+ " let viewer = $3Dmol.createViewer(element, config);\n",
1051
+ " \n",
1052
+ " {high_score_script}\n",
1053
+ " \n",
1054
+ " // Add hover functionality\n",
1055
+ " viewer.setHoverable(\n",
1056
+ " {{}}, \n",
1057
+ " true, \n",
1058
+ " function(atom, viewer, event, container) {{\n",
1059
+ " if (!atom.label) {{\n",
1060
+ " atom.label = viewer.addLabel(\n",
1061
+ " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
1062
+ " {{\n",
1063
+ " position: atom, \n",
1064
+ " backgroundColor: 'mintcream', \n",
1065
+ " fontColor: 'black',\n",
1066
+ " fontSize: 12,\n",
1067
+ " padding: 2\n",
1068
+ " }}\n",
1069
+ " );\n",
1070
+ " }}\n",
1071
+ " }},\n",
1072
+ " function(atom, viewer) {{\n",
1073
+ " if (atom.label) {{\n",
1074
+ " viewer.removeLabel(atom.label);\n",
1075
+ " delete atom.label;\n",
1076
+ " }}\n",
1077
+ " }}\n",
1078
+ " );\n",
1079
+ " \n",
1080
+ " viewer.zoomTo();\n",
1081
+ " viewer.render();\n",
1082
+ " viewer.zoom(0.8, 2000);\n",
1083
+ " }});\n",
1084
+ " </script>\n",
1085
+ " </body>\n",
1086
+ " </html>\n",
1087
+ " \"\"\"\n",
1088
+ " \n",
1089
+ " # Return the HTML content within an iframe safely encoded for special characters\n",
1090
+ " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
1091
+ "\n",
1092
+ "\n",
1093
+ "# Gradio UI\n",
1094
+ "with gr.Blocks() as demo:\n",
1095
+ " gr.Markdown(\"# Protein Binding Site Prediction\")\n",
1096
+ " \n",
1097
+ " with gr.Row():\n",
1098
+ " pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1099
+ " visualize_btn = gr.Button(\"Visualize Structure\")\n",
1100
+ "\n",
1101
+ " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n",
1102
+ " {\n",
1103
+ " \"model\": 0,\n",
1104
+ " \"style\": \"cartoon\",\n",
1105
+ " \"color\": \"whiteCarbon\",\n",
1106
+ " \"residue_range\": \"\",\n",
1107
+ " \"around\": 0,\n",
1108
+ " \"byres\": False,\n",
1109
+ " }\n",
1110
+ " ])\n",
1111
+ "\n",
1112
+ " with gr.Row():\n",
1113
+ " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
1114
+ " prediction_btn = gr.Button(\"Predict Binding Site\")\n",
1115
+ "\n",
1116
+ "\n",
1117
+ " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
1118
+ " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
1119
+ " download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n",
1120
+ " \n",
1121
+ " prediction_btn.click(\n",
1122
+ " process_pdb, \n",
1123
+ " inputs=[\n",
1124
+ " pdb_input, \n",
1125
+ " segment_input\n",
1126
+ " ], \n",
1127
+ " outputs=[predictions_output, molecule_output, download_output]\n",
1128
+ " )\n",
1129
+ "\n",
1130
+ " visualize_btn.click(\n",
1131
+ " fetch_pdb, \n",
1132
+ " inputs=[pdb_input], \n",
1133
+ " outputs=molecule_output2\n",
1134
+ " )\n",
1135
+ "\n",
1136
+ " gr.Markdown(\"## Examples\")\n",
1137
+ " gr.Examples(\n",
1138
+ " examples=[\n",
1139
+ " [\"7RPZ\", \"A\"],\n",
1140
+ " [\"2IWI\", \"B\"],\n",
1141
+ " [\"2F6V\", \"A\"]\n",
1142
+ " ],\n",
1143
+ " inputs=[pdb_input, segment_input],\n",
1144
+ " outputs=[predictions_output, molecule_output, download_output]\n",
1145
+ " )\n",
1146
+ "\n",
1147
+ "demo.launch(share=True)"
1148
+ ]
1149
+ },
1150
+ {
1151
+ "cell_type": "code",
1152
+ "execution_count": 38,
1153
+ "id": "514fad12-a31a-495f-af9e-04a18e11175e",
1154
+ "metadata": {},
1155
+ "outputs": [
1156
+ {
1157
+ "name": "stdout",
1158
+ "output_type": "stream",
1159
+ "text": [
1160
+ "* Running on local URL: http://127.0.0.1:7896\n",
1161
+ "* Running on public URL: https://387fb4706015321f92.gradio.live\n",
1162
+ "\n",
1163
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
1164
+ ]
1165
+ },
1166
+ {
1167
+ "data": {
1168
+ "text/html": [
1169
+ "<div><iframe src=\"https://387fb4706015321f92.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
1170
+ ],
1171
+ "text/plain": [
1172
+ "<IPython.core.display.HTML object>"
1173
+ ]
1174
+ },
1175
+ "metadata": {},
1176
+ "output_type": "display_data"
1177
+ },
1178
+ {
1179
+ "data": {
1180
+ "text/plain": []
1181
+ },
1182
+ "execution_count": 38,
1183
+ "metadata": {},
1184
+ "output_type": "execute_result"
1185
+ }
1186
+ ],
1187
+ "source": [
1188
+ "import os\n",
1189
+ "from datetime import datetime\n",
1190
+ "import gradio as gr\n",
1191
+ "import numpy as np\n",
1192
+ "import requests\n",
1193
+ "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n",
1194
+ "from Bio.PDB.Polypeptide import is_aa\n",
1195
+ "from Bio.SeqUtils import seq1\n",
1196
+ "from gradio_molecule3d import Molecule3D\n",
1197
+ "from typing import Optional, Tuple\n",
1198
+ "\n",
1199
+ "def normalize_scores(scores):\n",
1200
+ " min_score = np.min(scores)\n",
1201
+ " max_score = np.max(scores)\n",
1202
+ " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
1203
+ "\n",
1204
+ "def read_mol(pdb_path):\n",
1205
+ " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
1206
+ " with open(pdb_path, 'r') as f:\n",
1207
+ " return f.read()\n",
1208
+ "\n",
1209
+ "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
1210
+ " \"\"\"\n",
1211
+ " Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
1212
+ " If a structure file already exists locally, it uses that.\n",
1213
+ " \"\"\"\n",
1214
+ " file_path = download_structure(pdb_id, output_dir)\n",
1215
+ " if file_path:\n",
1216
+ " return file_path\n",
1217
+ " else:\n",
1218
+ " return None\n",
1219
+ "\n",
1220
+ "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
1221
+ " \"\"\"\n",
1222
+ " Attempt to download the structure file in CIF or PDB format.\n",
1223
+ " Returns the path to the downloaded file, or None if download fails.\n",
1224
+ " \"\"\"\n",
1225
+ " for ext in ['.cif', '.pdb']:\n",
1226
+ " file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
1227
+ " if os.path.exists(file_path):\n",
1228
+ " return file_path\n",
1229
+ " url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
1230
+ " try:\n",
1231
+ " response = requests.get(url, timeout=10)\n",
1232
+ " if response.status_code == 200:\n",
1233
+ " with open(file_path, 'wb') as f:\n",
1234
+ " f.write(response.content)\n",
1235
+ " return file_path\n",
1236
+ " except Exception as e:\n",
1237
+ " print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
1238
+ " return None\n",
1239
+ "\n",
1240
+ "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
1241
+ " \"\"\"\n",
1242
+ " Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
1243
+ " \"\"\"\n",
1244
+ " pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
1245
+ " parser = MMCIFParser(QUIET=True)\n",
1246
+ " structure = parser.get_structure('protein', cif_path)\n",
1247
+ " io = PDBIO()\n",
1248
+ " io.set_structure(structure)\n",
1249
+ " io.save(pdb_path)\n",
1250
+ " return pdb_path\n",
1251
+ "\n",
1252
+ "def fetch_pdb(pdb_id):\n",
1253
+ " pdb_path = fetch_structure(pdb_id)\n",
1254
+ " if not pdb_path:\n",
1255
+ " return None\n",
1256
+ " _, ext = os.path.splitext(pdb_path)\n",
1257
+ " if ext == '.cif':\n",
1258
+ " pdb_path = convert_cif_to_pdb(pdb_path)\n",
1259
+ " return pdb_path\n",
1260
+ "\n",
1261
+ "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n",
1262
+ " \"\"\"\n",
1263
+ " Create a PDB file with only the specified chain and replace B-factor with prediction scores\n",
1264
+ " \"\"\"\n",
1265
+ " # Read the original PDB file\n",
1266
+ " parser = PDBParser(QUIET=True)\n",
1267
+ " structure = parser.get_structure('protein', input_pdb)\n",
1268
+ " \n",
1269
+ " # Prepare a new structure with only the specified chain\n",
1270
+ " new_structure = structure.copy()\n",
1271
+ " for model in new_structure:\n",
1272
+ " # Remove all chains except the specified one\n",
1273
+ " chains_to_remove = [chain for chain in model if chain.id != chain_id]\n",
1274
+ " for chain in chains_to_remove:\n",
1275
+ " model.detach_child(chain.id)\n",
1276
+ " \n",
1277
+ " # Create a modified PDB with scores in B-factor\n",
1278
+ " scores_dict = {resi: score for resi, score in residue_scores}\n",
1279
+ " for model in new_structure:\n",
1280
+ " for chain in model:\n",
1281
+ " for residue in chain:\n",
1282
+ " if residue.id[1] in scores_dict:\n",
1283
+ " for atom in residue:\n",
1284
+ " atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range\n",
1285
+ " \n",
1286
+ " # Save the modified structure\n",
1287
+ " output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n",
1288
+ " io = PDBIO()\n",
1289
+ " io.set_structure(new_structure)\n",
1290
+ " io.save(output_pdb)\n",
1291
+ " \n",
1292
+ " return output_pdb\n",
1293
+ "\n",
1294
+ "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n",
1295
+ " \"\"\"\n",
1296
+ " Calculate the geometric center of high-scoring residues\n",
1297
+ " \"\"\"\n",
1298
+ " parser = PDBParser(QUIET=True)\n",
1299
+ " structure = parser.get_structure('protein', pdb_path)\n",
1300
+ " \n",
1301
+ " # Collect coordinates of CA atoms from high-scoring residues\n",
1302
+ " coords = []\n",
1303
+ " for model in structure:\n",
1304
+ " for chain in model:\n",
1305
+ " if chain.id == chain_id:\n",
1306
+ " for residue in chain:\n",
1307
+ " if residue.id[1] in high_score_residues:\n",
1308
+ " if 'CA' in residue: # Use alpha carbon as representative\n",
1309
+ " ca_atom = residue['CA']\n",
1310
+ " coords.append(ca_atom.coord)\n",
1311
+ " \n",
1312
+ " # Calculate geometric center\n",
1313
+ " if coords:\n",
1314
+ " center = np.mean(coords, axis=0)\n",
1315
+ " return center\n",
1316
+ " return None\n",
1317
+ "\n",
1318
+ "def process_pdb(pdb_id_or_file, segment):\n",
1319
+ " # Determine if input is a PDB ID or file path\n",
1320
+ " if pdb_id_or_file.endswith('.pdb'):\n",
1321
+ " pdb_path = pdb_id_or_file\n",
1322
+ " pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n",
1323
+ " else:\n",
1324
+ " pdb_id = pdb_id_or_file\n",
1325
+ " pdb_path = fetch_pdb(pdb_id)\n",
1326
+ " \n",
1327
+ " if not pdb_path:\n",
1328
+ " return \"Failed to fetch PDB file\", None, None\n",
1329
+ " \n",
1330
+ " # Determine the file format and choose the appropriate parser\n",
1331
+ " _, ext = os.path.splitext(pdb_path)\n",
1332
+ " parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
1333
+ " \n",
1334
+ " try:\n",
1335
+ " # Parse the structure file\n",
1336
+ " structure = parser.get_structure('protein', pdb_path)\n",
1337
+ " except Exception as e:\n",
1338
+ " return f\"Error parsing structure file: {e}\", None, None\n",
1339
+ " \n",
1340
+ " # Extract the specified chain\n",
1341
+ " try:\n",
1342
+ " chain = structure[0][segment]\n",
1343
+ " except KeyError:\n",
1344
+ " return \"Invalid Chain ID\", None, None\n",
1345
+ " \n",
1346
+ " protein_residues = [res for res in chain if is_aa(res)]\n",
1347
+ " sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
1348
+ " sequence_id = [res.id[1] for res in protein_residues]\n",
1349
+ " \n",
1350
+ " # Generate random scores for residues\n",
1351
+ " scores = np.random.rand(len(sequence))\n",
1352
+ " normalized_scores = normalize_scores(scores)\n",
1353
+ " \n",
1354
+ " # Zip residues with scores to track the residue ID and score\n",
1355
+ " residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
1356
+ "\n",
1357
+ " # Identify high and mid scoring residues\n",
1358
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
1359
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
1360
+ "\n",
1361
+ " # Calculate geometric center of high-scoring residues\n",
1362
+ " geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n",
1363
+ " pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n",
1364
+ " pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n",
1365
+ "\n",
1366
+ " # Generate the result string\n",
1367
+ " current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
1368
+ " result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n",
1369
+ " result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n",
1370
+ " result_str += \"\\n\".join([\n",
1371
+ " f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
1372
+ " for i, res in enumerate(protein_residues)])\n",
1373
+ " \n",
1374
+ " # Create prediction and scored PDB files\n",
1375
+ " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
1376
+ " with open(prediction_file, \"w\") as f:\n",
1377
+ " f.write(result_str)\n",
1378
+ "\n",
1379
+ " # Create chain-specific PDB with scores in B-factor\n",
1380
+ " scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n",
1381
+ "\n",
1382
+ " # Molecule visualization with updated script\n",
1383
+ " mol_vis = molecule(pdb_path, residue_scores, segment)\n",
1384
+ "\n",
1385
+ " # Construct PyMOL command suggestions\n",
1386
+ " pymol_commands = f\"\"\"\n",
1387
+ "PyMOL Visualization Commands:\n",
1388
+ "1. Load PDB: load {os.path.abspath(pdb_path)}\n",
1389
+ "2. Select high-scoring residues: {pymol_selection}\n",
1390
+ "3. Highlight high-scoring residues: show sticks, high_score_residues\n",
1391
+ "{pymol_center_cmd}\n",
1392
+ "\"\"\"\n",
1393
+ " \n",
1394
+ " return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n",
1395
+ "\n",
1396
+ "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
1397
+ " mol = read_mol(input_pdb) # Read PDB file content\n",
1398
+ "\n",
1399
+ " # Prepare high-scoring residues script if scores are provided\n",
1400
+ " high_score_script = \"\"\n",
1401
+ " if residue_scores is not None:\n",
1402
+ " # Filter residues based on their scores\n",
1403
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
1404
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
1405
+ " \n",
1406
+ " high_score_script = \"\"\"\n",
1407
+ " // Load the original model and apply white cartoon style\n",
1408
+ " let chainModel = viewer.addModel(pdb, \"pdb\");\n",
1409
+ " chainModel.setStyle({}, {});\n",
1410
+ " chainModel.setStyle(\n",
1411
+ " {\"chain\": \"%s\"}, \n",
1412
+ " {\"cartoon\": {\"color\": \"white\"}}\n",
1413
+ " );\n",
1414
+ "\n",
1415
+ " // Create a new model for high-scoring residues and apply red sticks style\n",
1416
+ " let highScoreModel = viewer.addModel(pdb, \"pdb\");\n",
1417
+ " highScoreModel.setStyle({}, {});\n",
1418
+ " highScoreModel.setStyle(\n",
1419
+ " {\"chain\": \"%s\", \"resi\": [%s]}, \n",
1420
+ " {\"stick\": {\"color\": \"red\"}}\n",
1421
+ " );\n",
1422
+ "\n",
1423
+ " // Create a new model for medium-scoring residues and apply orange sticks style\n",
1424
+ " let midScoreModel = viewer.addModel(pdb, \"pdb\");\n",
1425
+ " midScoreModel.setStyle({}, {});\n",
1426
+ " midScoreModel.setStyle(\n",
1427
+ " {\"chain\": \"%s\", \"resi\": [%s]}, \n",
1428
+ " {\"stick\": {\"color\": \"orange\"}}\n",
1429
+ " );\n",
1430
+ " \"\"\" % (\n",
1431
+ " segment,\n",
1432
+ " segment,\n",
1433
+ " \", \".join(str(resi) for resi in high_score_residues),\n",
1434
+ " segment,\n",
1435
+ " \", \".join(str(resi) for resi in mid_score_residues)\n",
1436
+ " )\n",
1437
+ " \n",
1438
+ " # Generate the full HTML content\n",
1439
+ " html_content = f\"\"\"\n",
1440
+ " <!DOCTYPE html>\n",
1441
+ " <html>\n",
1442
+ " <head> \n",
1443
+ " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
1444
+ " <style>\n",
1445
+ " .mol-container {{\n",
1446
+ " width: 100%;\n",
1447
+ " height: 700px;\n",
1448
+ " position: relative;\n",
1449
+ " }}\n",
1450
+ " </style>\n",
1451
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
1452
+ " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
1453
+ " </head>\n",
1454
+ " <body>\n",
1455
+ " <div id=\"container\" class=\"mol-container\"></div>\n",
1456
+ " <script>\n",
1457
+ " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
1458
+ " $(document).ready(function () {{\n",
1459
+ " let element = $(\"#container\");\n",
1460
+ " let config = {{ backgroundColor: \"white\" }};\n",
1461
+ " let viewer = $3Dmol.createViewer(element, config);\n",
1462
+ " \n",
1463
+ " {high_score_script}\n",
1464
+ " \n",
1465
+ " // Add hover functionality\n",
1466
+ " viewer.setHoverable(\n",
1467
+ " {{}}, \n",
1468
+ " true, \n",
1469
+ " function(atom, viewer, event, container) {{\n",
1470
+ " if (!atom.label) {{\n",
1471
+ " atom.label = viewer.addLabel(\n",
1472
+ " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
1473
+ " {{\n",
1474
+ " position: atom, \n",
1475
+ " backgroundColor: 'mintcream', \n",
1476
+ " fontColor: 'black',\n",
1477
+ " fontSize: 12,\n",
1478
+ " padding: 2\n",
1479
+ " }}\n",
1480
+ " );\n",
1481
+ " }}\n",
1482
+ " }},\n",
1483
+ " function(atom, viewer) {{\n",
1484
+ " if (atom.label) {{\n",
1485
+ " viewer.removeLabel(atom.label);\n",
1486
+ " delete atom.label;\n",
1487
+ " }}\n",
1488
+ " }}\n",
1489
+ " );\n",
1490
+ " \n",
1491
+ " viewer.zoomTo();\n",
1492
+ " viewer.render();\n",
1493
+ " viewer.zoom(0.8, 2000);\n",
1494
+ " }});\n",
1495
+ " </script>\n",
1496
+ " </body>\n",
1497
+ " </html>\n",
1498
+ " \"\"\"\n",
1499
+ " \n",
1500
+ " # Return the HTML content within an iframe safely encoded for special characters\n",
1501
+ " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
1502
+ "\n",
1503
+ "\n",
1504
+ "# Gradio UI\n",
1505
+ "with gr.Blocks() as demo:\n",
1506
+ " gr.Markdown(\"# Protein Binding Site Prediction\")\n",
1507
+ " \n",
1508
+ " with gr.Row():\n",
1509
+ " pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1510
+ " visualize_btn = gr.Button(\"Visualize Structure\")\n",
1511
+ "\n",
1512
+ " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n",
1513
+ " {\n",
1514
+ " \"model\": 0,\n",
1515
+ " \"style\": \"cartoon\",\n",
1516
+ " \"color\": \"whiteCarbon\",\n",
1517
+ " \"residue_range\": \"\",\n",
1518
+ " \"around\": 0,\n",
1519
+ " \"byres\": False,\n",
1520
+ " }\n",
1521
+ " ])\n",
1522
+ "\n",
1523
+ " with gr.Row():\n",
1524
+ " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
1525
+ " prediction_btn = gr.Button(\"Predict Binding Site\")\n",
1526
+ "\n",
1527
+ "\n",
1528
+ " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
1529
+ " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
1530
+ " download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n",
1531
+ " \n",
1532
+ " prediction_btn.click(\n",
1533
+ " process_pdb, \n",
1534
+ " inputs=[\n",
1535
+ " pdb_input, \n",
1536
+ " segment_input\n",
1537
+ " ], \n",
1538
+ " outputs=[predictions_output, molecule_output, download_output]\n",
1539
+ " )\n",
1540
+ "\n",
1541
+ " visualize_btn.click(\n",
1542
+ " fetch_pdb, \n",
1543
+ " inputs=[pdb_input], \n",
1544
+ " outputs=molecule_output2\n",
1545
+ " )\n",
1546
+ "\n",
1547
+ " gr.Markdown(\"## Examples\")\n",
1548
+ " gr.Examples(\n",
1549
+ " examples=[\n",
1550
+ " [\"7RPZ\", \"A\"],\n",
1551
+ " [\"2IWI\", \"B\"],\n",
1552
+ " [\"2F6V\", \"A\"]\n",
1553
+ " ],\n",
1554
+ " inputs=[pdb_input, segment_input],\n",
1555
+ " outputs=[predictions_output, molecule_output, download_output]\n",
1556
+ " )\n",
1557
+ "\n",
1558
+ "demo.launch(share=True)"
1559
+ ]
1560
+ },
1561
+ {
1562
+ "cell_type": "code",
1563
+ "execution_count": null,
1564
+ "id": "2f960cc2-8330-40f1-b54d-693ce922fa74",
1565
+ "metadata": {},
1566
+ "outputs": [],
1567
+ "source": []
1568
+ },
1569
+ {
1570
+ "cell_type": "code",
1571
+ "execution_count": null,
1572
+ "id": "cec41eef-c414-440f-a0ea-63fc8d3acf0b",
1573
+ "metadata": {},
1574
+ "outputs": [],
1575
+ "source": []
1576
+ }
1577
+ ],
1578
+ "metadata": {
1579
+ "kernelspec": {
1580
+ "display_name": "Python (LLM)",
1581
+ "language": "python",
1582
+ "name": "llm"
1583
+ },
1584
+ "language_info": {
1585
+ "codemirror_mode": {
1586
+ "name": "ipython",
1587
+ "version": 3
1588
+ },
1589
+ "file_extension": ".py",
1590
+ "mimetype": "text/x-python",
1591
+ "name": "python",
1592
+ "nbconvert_exporter": "python",
1593
+ "pygments_lexer": "ipython3",
1594
+ "version": "3.12.7"
1595
+ }
1596
+ },
1597
+ "nbformat": 4,
1598
+ "nbformat_minor": 5
1599
+ }
2IWI.cif ADDED
The diff for this file is too large to render. See raw diff
 
2IWI.pdb CHANGED
The diff for this file is too large to render. See raw diff
 
2IWI_predictions.txt CHANGED
@@ -1,244 +1,249 @@
1
- Y 32 0.77
2
- R 33 0.71
3
- L 34 0.07
4
- G 35 0.04
5
- P 36 0.28
6
- L 37 0.39
7
- L 38 0.81
8
- G 39 0.67
9
- K 40 0.56
10
- G 41 0.16
11
- G 42 0.52
12
- F 43 0.59
13
- G 44 0.64
14
- T 45 0.62
15
- V 46 0.11
16
- F 47 0.07
17
- A 48 0.35
18
- G 49 0.94
19
- H 50 0.23
20
- R 51 0.78
21
- L 52 0.08
22
- T 53 0.65
23
- D 54 0.45
24
- R 55 0.12
25
- L 56 0.08
26
- Q 57 0.67
27
- V 58 0.44
28
- A 59 0.48
29
- I 60 0.31
30
- K 61 0.01
31
- V 62 0.57
32
- I 63 0.40
33
- P 64 0.43
34
- R 65 0.56
35
- N 66 0.36
36
- R 67 0.80
37
- V 68 0.68
38
- L 69 0.33
39
- V 78 0.63
40
- T 79 0.11
41
- C 80 0.56
42
- P 81 0.01
43
- L 82 0.93
44
- E 83 0.19
45
- V 84 0.26
46
- A 85 0.26
47
- L 86 0.68
48
- L 87 0.44
49
- W 88 0.91
50
- K 89 0.65
51
- V 90 0.38
52
- G 91 0.30
53
- A 92 0.18
54
- G 93 0.56
55
- G 94 0.90
56
- G 95 0.89
57
- H 96 0.36
58
- P 97 0.23
59
- G 98 0.04
60
- V 99 0.65
61
- I 100 0.83
62
- R 101 0.35
63
- L 102 0.47
64
- L 103 0.53
65
- D 104 0.26
66
- W 105 0.12
67
- F 106 0.14
68
- F 112 0.22
69
- M 113 0.04
70
- L 114 0.84
71
- V 115 0.64
72
- L 116 0.82
73
- E 117 0.42
74
- R 118 0.79
75
- P 119 0.18
76
- L 120 0.68
77
- P 121 0.35
78
- A 122 0.40
79
- Q 123 0.48
80
- D 124 0.02
81
- L 125 0.66
82
- F 126 0.51
83
- D 127 0.21
84
- Y 128 0.33
85
- I 129 0.37
86
- T 130 0.16
87
- E 131 0.35
88
- K 132 0.10
89
- G 133 0.39
90
- P 134 0.64
91
- L 135 0.56
92
- G 136 0.59
93
- E 137 0.85
94
- G 138 0.21
95
- P 139 0.27
96
- S 140 0.27
97
- R 141 0.12
98
- C 142 0.43
99
- F 143 0.32
100
- F 144 0.80
101
- G 145 0.49
102
- Q 146 0.81
103
- V 147 0.06
104
- V 148 0.40
105
- A 149 0.45
106
- A 150 0.77
107
- I 151 0.53
108
- Q 152 0.34
109
- H 153 0.33
110
- C 154 0.89
111
- H 155 0.51
112
- S 156 0.48
113
- R 157 0.21
114
- G 158 0.64
115
- V 159 0.00
116
- V 160 0.85
117
- H 161 0.55
118
- R 162 0.74
119
- D 163 0.41
120
- I 164 0.57
121
- K 165 0.63
122
- D 166 0.77
123
- E 167 0.06
124
- N 168 0.67
125
- I 169 0.65
126
- L 170 0.79
127
- I 171 0.42
128
- D 172 0.39
129
- L 173 0.49
130
- R 174 0.63
131
- R 175 0.39
132
- G 176 0.72
133
- C 177 0.88
134
- A 178 0.79
135
- K 179 0.76
136
- L 180 0.81
137
- I 181 0.30
138
- D 182 0.22
139
- F 183 0.31
140
- G 184 0.85
141
- S 185 0.67
142
- G 186 0.25
143
- A 187 0.50
144
- L 188 0.96
145
- L 189 0.26
146
- H 190 0.13
147
- D 191 0.29
148
- E 192 0.02
149
- P 193 0.65
150
- Y 194 0.32
151
- T 195 0.41
152
- D 196 0.82
153
- F 197 0.34
154
- D 198 0.15
155
- G 199 0.20
156
- T 200 0.46
157
- R 201 0.22
158
- V 202 0.26
159
- Y 203 0.29
160
- S 204 0.51
161
- P 205 0.70
162
- P 206 0.14
163
- E 207 0.89
164
- W 208 0.09
165
- I 209 0.54
166
- S 210 0.16
167
- R 211 0.69
168
- H 212 0.63
169
- Q 213 0.06
170
- Y 214 0.02
171
- H 215 0.17
172
- A 216 0.23
173
- L 217 0.65
174
- P 218 0.13
175
- A 219 0.91
176
- T 220 0.97
177
- V 221 0.77
178
- W 222 0.40
179
- S 223 0.91
180
- L 224 1.00
181
- G 225 0.97
182
- I 226 0.24
183
- L 227 0.44
184
- L 228 0.19
185
- Y 229 0.06
186
- D 230 0.32
187
- M 231 0.93
188
- V 232 0.35
189
- C 233 0.79
190
- G 234 0.50
191
- D 235 0.49
192
- I 236 0.10
193
- P 237 0.49
194
- F 238 0.12
195
- E 239 0.47
196
- R 240 0.40
197
- D 241 0.63
198
- Q 242 1.00
199
- E 243 0.55
200
- I 244 0.78
201
- L 245 0.92
202
- E 246 0.29
203
- A 247 0.37
204
- E 248 0.78
205
- L 249 0.54
206
- H 250 0.64
207
- F 251 0.30
208
- P 252 0.25
209
- A 253 0.01
210
- H 254 0.51
211
- V 255 0.81
212
- S 256 0.93
213
- P 257 0.79
214
- D 258 0.74
215
- C 259 0.31
216
- C 260 0.51
217
- A 261 0.91
218
- L 262 0.04
219
- I 263 0.21
220
- R 264 0.07
221
- R 265 0.34
222
- C 266 0.93
223
- L 267 0.80
224
- A 268 0.75
225
- P 269 0.59
226
- K 270 0.62
227
- P 271 0.18
228
- S 272 0.32
229
- S 273 0.54
230
- R 274 0.14
231
- P 275 0.38
232
- S 276 0.29
233
- L 277 0.97
234
- E 278 0.72
235
- E 279 0.05
236
- I 280 0.26
237
- L 281 0.24
238
- L 282 0.42
239
- D 283 0.84
240
- P 284 0.36
241
- W 285 0.79
242
- M 286 0.05
243
- Q 287 0.81
244
- T 288 0.39
 
 
 
 
 
 
1
+ GLY 22 G 0.18
2
+ LYS 23 K 0.51
3
+ ASP 24 D 0.12
4
+ ARG 25 R 0.25
5
+ GLU 26 E 0.08
6
+ ALA 27 A 0.82
7
+ PHE 28 F 0.65
8
+ GLU 29 E 0.65
9
+ ALA 30 A 0.22
10
+ GLU 31 E 0.49
11
+ TYR 32 Y 0.57
12
+ ARG 33 R 0.56
13
+ LEU 34 L 0.83
14
+ GLY 35 G 0.42
15
+ PRO 36 P 0.97
16
+ LEU 37 L 0.65
17
+ LEU 38 L 0.08
18
+ GLY 39 G 0.05
19
+ LYS 40 K 0.55
20
+ GLY 41 G 0.38
21
+ GLY 42 G 0.45
22
+ PHE 43 F 0.92
23
+ GLY 44 G 0.00
24
+ THR 45 T 0.76
25
+ VAL 46 V 0.63
26
+ PHE 47 F 0.97
27
+ ALA 48 A 0.57
28
+ GLY 49 G 0.94
29
+ HIS 50 H 0.40
30
+ ARG 51 R 0.27
31
+ LEU 52 L 0.65
32
+ THR 53 T 0.84
33
+ ASP 54 D 0.85
34
+ ARG 55 R 0.46
35
+ LEU 56 L 0.87
36
+ GLN 57 Q 0.76
37
+ VAL 58 V 0.22
38
+ ALA 59 A 0.65
39
+ ILE 60 I 0.87
40
+ LYS 61 K 0.69
41
+ VAL 62 V 0.76
42
+ ILE 63 I 0.70
43
+ PRO 64 P 0.04
44
+ ARG 65 R 0.20
45
+ THR 79 T 0.80
46
+ CYS 80 C 0.82
47
+ PRO 81 P 0.72
48
+ LEU 82 L 0.17
49
+ GLU 83 E 0.70
50
+ VAL 84 V 0.21
51
+ ALA 85 A 0.15
52
+ LEU 86 L 0.28
53
+ LEU 87 L 0.03
54
+ TRP 88 W 0.18
55
+ LYS 89 K 0.01
56
+ VAL 90 V 0.43
57
+ GLY 91 G 0.25
58
+ ALA 92 A 0.65
59
+ GLY 93 G 0.00
60
+ GLY 94 G 0.52
61
+ GLY 95 G 0.22
62
+ HIS 96 H 0.03
63
+ PRO 97 P 0.57
64
+ GLY 98 G 0.32
65
+ VAL 99 V 0.89
66
+ ILE 100 I 0.14
67
+ ARG 101 R 0.66
68
+ LEU 102 L 0.18
69
+ LEU 103 L 0.30
70
+ ASP 104 D 0.36
71
+ TRP 105 W 0.83
72
+ PHE 106 F 0.77
73
+ GLU 107 E 0.95
74
+ PHE 112 F 0.04
75
+ MET 113 M 0.05
76
+ LEU 114 L 0.32
77
+ VAL 115 V 1.00
78
+ LEU 116 L 0.43
79
+ GLU 117 E 0.76
80
+ ARG 118 R 0.65
81
+ PRO 119 P 0.28
82
+ LEU 120 L 0.74
83
+ PRO 121 P 0.69
84
+ ALA 122 A 0.89
85
+ GLN 123 Q 0.68
86
+ ASP 124 D 0.67
87
+ LEU 125 L 0.89
88
+ PHE 126 F 0.33
89
+ ASP 127 D 0.05
90
+ TYR 128 Y 0.59
91
+ ILE 129 I 0.19
92
+ THR 130 T 0.88
93
+ GLU 131 E 0.24
94
+ LYS 132 K 0.04
95
+ GLY 133 G 0.99
96
+ PRO 134 P 0.43
97
+ LEU 135 L 0.31
98
+ GLY 136 G 0.83
99
+ GLU 137 E 0.12
100
+ GLY 138 G 0.02
101
+ PRO 139 P 0.71
102
+ SER 140 S 0.70
103
+ ARG 141 R 0.63
104
+ CYS 142 C 0.70
105
+ PHE 143 F 0.92
106
+ PHE 144 F 0.02
107
+ GLY 145 G 0.72
108
+ GLN 146 Q 0.03
109
+ VAL 147 V 0.70
110
+ VAL 148 V 0.34
111
+ ALA 149 A 0.95
112
+ ALA 150 A 0.39
113
+ ILE 151 I 0.21
114
+ GLN 152 Q 0.86
115
+ HIS 153 H 0.11
116
+ CYS 154 C 0.30
117
+ HIS 155 H 0.12
118
+ SER 156 S 0.55
119
+ ARG 157 R 0.20
120
+ GLY 158 G 0.32
121
+ VAL 159 V 0.80
122
+ VAL 160 V 0.43
123
+ HIS 161 H 0.99
124
+ ARG 162 R 0.13
125
+ ASP 163 D 0.73
126
+ ILE 164 I 0.70
127
+ LYS 165 K 0.88
128
+ ASP 166 D 0.56
129
+ GLU 167 E 0.61
130
+ ASN 168 N 0.01
131
+ ILE 169 I 0.48
132
+ LEU 170 L 0.18
133
+ ILE 171 I 0.28
134
+ ASP 172 D 0.79
135
+ LEU 173 L 0.33
136
+ ARG 174 R 0.31
137
+ ARG 175 R 0.39
138
+ GLY 176 G 0.19
139
+ CYS 177 C 0.57
140
+ ALA 178 A 0.99
141
+ LYS 179 K 0.47
142
+ LEU 180 L 0.02
143
+ ILE 181 I 0.81
144
+ ASP 182 D 0.59
145
+ PHE 183 F 0.74
146
+ GLY 184 G 0.43
147
+ SER 185 S 0.90
148
+ GLY 186 G 0.87
149
+ ALA 187 A 0.39
150
+ LEU 188 L 0.43
151
+ LEU 189 L 0.84
152
+ HIS 190 H 0.91
153
+ ASP 191 D 0.45
154
+ GLU 192 E 0.00
155
+ PRO 193 P 0.86
156
+ TYR 194 Y 0.11
157
+ THR 195 T 0.54
158
+ ASP 196 D 0.70
159
+ PHE 197 F 0.62
160
+ ASP 198 D 0.31
161
+ GLY 199 G 0.41
162
+ THR 200 T 0.85
163
+ ARG 201 R 0.18
164
+ VAL 202 V 0.10
165
+ TYR 203 Y 0.22
166
+ SER 204 S 0.31
167
+ PRO 205 P 0.41
168
+ PRO 206 P 0.87
169
+ GLU 207 E 0.77
170
+ TRP 208 W 0.51
171
+ ILE 209 I 0.18
172
+ SER 210 S 0.03
173
+ ARG 211 R 0.41
174
+ HIS 212 H 0.83
175
+ GLN 213 Q 0.30
176
+ TYR 214 Y 0.38
177
+ HIS 215 H 0.28
178
+ ALA 216 A 0.51
179
+ LEU 217 L 0.61
180
+ PRO 218 P 0.77
181
+ ALA 219 A 0.79
182
+ THR 220 T 0.32
183
+ VAL 221 V 0.35
184
+ TRP 222 W 0.44
185
+ SER 223 S 0.35
186
+ LEU 224 L 0.67
187
+ GLY 225 G 0.21
188
+ ILE 226 I 0.88
189
+ LEU 227 L 0.38
190
+ LEU 228 L 0.27
191
+ TYR 229 Y 0.53
192
+ ASP 230 D 0.36
193
+ MET 231 M 0.76
194
+ VAL 232 V 0.59
195
+ CYS 233 C 0.44
196
+ GLY 234 G 0.88
197
+ ASP 235 D 0.54
198
+ ILE 236 I 0.63
199
+ PRO 237 P 0.41
200
+ PHE 238 F 0.84
201
+ GLU 239 E 0.66
202
+ ARG 240 R 0.20
203
+ ASP 241 D 0.08
204
+ GLN 242 Q 0.23
205
+ GLU 243 E 0.31
206
+ ILE 244 I 0.17
207
+ LEU 245 L 0.58
208
+ GLU 246 E 0.76
209
+ ALA 247 A 0.82
210
+ GLU 248 E 0.39
211
+ LEU 249 L 0.53
212
+ HIS 250 H 0.67
213
+ PHE 251 F 0.36
214
+ PRO 252 P 0.16
215
+ ALA 253 A 0.08
216
+ HIS 254 H 0.53
217
+ VAL 255 V 0.39
218
+ SER 256 S 0.24
219
+ PRO 257 P 0.06
220
+ ASP 258 D 0.79
221
+ CYS 259 C 0.54
222
+ CYS 260 C 0.46
223
+ ALA 261 A 0.29
224
+ LEU 262 L 0.60
225
+ ILE 263 I 0.33
226
+ ARG 264 R 0.56
227
+ ARG 265 R 0.95
228
+ CYS 266 C 0.63
229
+ LEU 267 L 0.83
230
+ ALA 268 A 0.22
231
+ PRO 269 P 0.18
232
+ LYS 270 K 0.71
233
+ PRO 271 P 0.91
234
+ SER 272 S 0.84
235
+ SER 273 S 0.62
236
+ ARG 274 R 0.22
237
+ PRO 275 P 0.34
238
+ SER 276 S 0.74
239
+ LEU 277 L 0.41
240
+ GLU 278 E 0.78
241
+ GLU 279 E 0.76
242
+ ILE 280 I 0.40
243
+ LEU 281 L 0.27
244
+ LEU 282 L 0.23
245
+ ASP 283 D 0.65
246
+ PRO 284 P 0.45
247
+ TRP 285 W 0.72
248
+ MET 286 M 0.57
249
+ GLN 287 Q 0.29
4BDU.cif ADDED
The diff for this file is too large to render. See raw diff
 
4BDU.pdb ADDED
The diff for this file is too large to render. See raw diff
 
4BDU_A_scored.pdb ADDED
The diff for this file is too large to render. See raw diff
 
4BDU_C_scored.pdb ADDED
The diff for this file is too large to render. See raw diff
 
4BDU_predictions.txt ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Prediction for PDB: 4BDU, Chain: A
2
+ Date: 2024-12-11 16:57:50
3
+
4
+ Columns: Residue Name, Residue Number, One-letter Code, Normalized Score
5
+
6
+ SER 2 S 0.05
7
+ LYS 3 K 0.39
8
+ GLY 4 G 0.24
9
+ GLU 5 E 0.26
10
+ GLU 6 E 0.35
11
+ LEU 7 L 0.45
12
+ PHE 8 F 0.82
13
+ THR 9 T 0.32
14
+ GLY 10 G 0.73
15
+ VAL 11 V 0.42
16
+ VAL 12 V 0.33
17
+ PRO 13 P 0.96
18
+ ILE 14 I 0.68
19
+ LEU 15 L 0.71
20
+ VAL 16 V 0.84
21
+ GLU 17 E 0.26
22
+ LEU 18 L 0.54
23
+ ASP 19 D 0.46
24
+ GLY 20 G 0.12
25
+ ASP 21 D 0.57
26
+ VAL 22 V 0.32
27
+ ASN 23 N 0.18
28
+ GLY 24 G 0.48
29
+ HIS 25 H 0.95
30
+ LYS 26 K 0.88
31
+ PHE 27 F 0.13
32
+ SER 28 S 0.12
33
+ VAL 29 V 0.58
34
+ SER 30 S 0.19
35
+ GLY 31 G 0.09
36
+ GLU 32 E 0.17
37
+ GLY 33 G 0.60
38
+ GLU 34 E 0.92
39
+ GLY 35 G 0.48
40
+ ASP 36 D 0.35
41
+ ALA 37 A 0.72
42
+ THR 38 T 0.47
43
+ TYR 39 Y 0.11
44
+ GLY 40 G 0.57
45
+ LYS 41 K 0.86
46
+ LEU 42 L 0.42
47
+ THR 43 T 0.98
48
+ LEU 44 L 0.27
49
+ LYS 45 K 0.05
50
+ PHE 46 F 0.54
51
+ ILE 47 I 0.25
52
+ CYS 48 C 0.73
53
+ THR 49 T 0.44
54
+ THR 50 T 0.85
55
+ GLY 51 G 0.17
56
+ LYS 52 K 0.72
57
+ LEU 53 L 0.03
58
+ PRO 54 P 0.26
59
+ VAL 55 V 0.64
60
+ PRO 56 P 0.88
61
+ TRP 57 W 0.84
62
+ PRO 58 P 0.71
63
+ THR 59 T 0.41
64
+ LEU 60 L 0.18
65
+ VAL 61 V 0.32
66
+ THR 62 T 0.87
67
+ THR 63 T 0.87
68
+ PHE 64 F 1.00
69
+ VAL 68 V 0.50
70
+ GLN 69 Q 0.10
71
+ CYS 70 C 0.71
72
+ PHE 71 F 0.47
73
+ SER 72 S 0.46
74
+ ARG 73 R 0.99
75
+ TYR 74 Y 0.40
76
+ PRO 75 P 0.78
77
+ ASP 76 D 0.42
78
+ HIS 77 H 0.93
79
+ MET 78 M 0.47
80
+ LYS 79 K 0.51
81
+ GLN 80 Q 0.85
82
+ HIS 81 H 0.11
83
+ ASP 82 D 0.87
84
+ PHE 83 F 0.13
85
+ PHE 84 F 0.56
86
+ LYS 85 K 0.44
87
+ SER 86 S 0.44
88
+ ALA 87 A 0.20
89
+ MET 88 M 0.33
90
+ PRO 89 P 0.77
91
+ GLU 90 E 0.32
92
+ GLY 91 G 0.80
93
+ TYR 92 Y 0.52
94
+ VAL 93 V 0.46
95
+ GLN 94 Q 0.26
96
+ GLU 95 E 0.03
97
+ ARG 96 R 0.99
98
+ THR 97 T 0.72
99
+ ILE 98 I 0.38
100
+ PHE 99 F 0.63
101
+ PHE 100 F 0.03
102
+ LYS 101 K 0.10
103
+ ASP 102 D 0.52
104
+ ASP 103 D 0.41
105
+ GLY 104 G 0.91
106
+ ASN 105 N 0.17
107
+ TYR 106 Y 0.75
108
+ LYS 107 K 0.07
109
+ THR 108 T 0.78
110
+ ARG 109 R 0.21
111
+ ALA 110 A 0.93
112
+ GLU 111 E 0.34
113
+ VAL 112 V 0.06
114
+ LYS 113 K 0.92
115
+ PHE 114 F 0.43
116
+ GLU 115 E 0.22
117
+ GLY 116 G 0.67
118
+ ASP 117 D 0.54
119
+ THR 118 T 0.18
120
+ LEU 119 L 0.33
121
+ VAL 120 V 0.52
122
+ ASN 121 N 0.23
123
+ ARG 122 R 0.18
124
+ ILE 123 I 0.52
125
+ GLU 124 E 0.85
126
+ LEU 125 L 0.66
127
+ LYS 126 K 0.69
128
+ GLY 127 G 0.46
129
+ ILE 128 I 0.48
130
+ ASP 129 D 0.55
131
+ PHE 130 F 0.90
132
+ LYS 131 K 1.00
133
+ GLU 132 E 0.98
134
+ ASP 133 D 0.41
135
+ GLY 134 G 0.78
136
+ ASN 135 N 0.12
137
+ ILE 136 I 0.06
138
+ LEU 137 L 0.80
139
+ GLY 138 G 0.70
140
+ HIS 139 H 0.52
141
+ LYS 140 K 0.40
142
+ LEU 141 L 0.97
143
+ GLU 142 E 0.25
144
+ TYR 143 Y 0.53
145
+ ASN 144 N 0.26
146
+ TYR 145 Y 0.67
147
+ ASN 146 N 0.65
148
+ SER 147 S 0.91
149
+ HIS 148 H 0.82
150
+ ASN 149 N 0.93
151
+ VAL 150 V 0.67
152
+ TYR 151 Y 0.87
153
+ ILE 152 I 0.02
154
+ MET 153 M 0.37
155
+ ALA 154 A 0.50
156
+ ASP 155 D 0.89
157
+ LYS 156 K 1.00
158
+ GLN 157 Q 0.96
159
+ LYS 158 K 0.83
160
+ ASN 159 N 0.95
161
+ GLY 160 G 0.02
162
+ ILE 161 I 0.57
163
+ LYS 162 K 0.82
164
+ VAL 163 V 0.66
165
+ ASN 164 N 0.32
166
+ PHE 165 F 0.50
167
+ LYS 166 K 0.11
168
+ ILE 167 I 0.49
169
+ ARG 168 R 0.20
170
+ HIS 169 H 0.82
171
+ ASN 170 N 0.34
172
+ ILE 171 I 0.91
173
+ GLU 172 E 0.28
174
+ ASP 173 D 0.02
175
+ GLY 174 G 0.09
176
+ SER 175 S 0.44
177
+ VAL 176 V 0.87
178
+ GLN 177 Q 0.65
179
+ LEU 178 L 0.88
180
+ ALA 179 A 0.89
181
+ ASP 180 D 0.53
182
+ HIS 181 H 0.89
183
+ TYR 182 Y 0.44
184
+ GLN 183 Q 0.02
185
+ GLN 184 Q 0.91
186
+ ASN 185 N 0.57
187
+ THR 186 T 0.00
188
+ PRO 187 P 0.97
189
+ ILE 188 I 0.17
190
+ GLY 189 G 0.57
191
+ ASP 190 D 0.46
192
+ GLY 191 G 0.08
193
+ PRO 192 P 0.85
194
+ VAL 193 V 0.09
195
+ LEU 194 L 0.79
196
+ LEU 195 L 0.61
197
+ PRO 196 P 0.72
198
+ ASP 197 D 0.29
199
+ ASN 198 N 0.95
200
+ HIS 199 H 0.78
201
+ TYR 200 Y 0.02
202
+ LEU 201 L 0.55
203
+ SER 202 S 0.63
204
+ THR 203 T 0.38
205
+ GLN 204 Q 0.18
206
+ SER 205 S 0.48
207
+ ASN 206 N 0.19
208
+ LEU 207 L 0.71
209
+ SER 208 S 0.56
210
+ LYS 209 K 0.56
211
+ ASP 210 D 0.98
212
+ PRO 211 P 0.43
213
+ ASN 212 N 0.91
214
+ GLU 213 E 0.76
215
+ LYS 214 K 0.58
216
+ ARG 215 R 0.42
217
+ ASP 216 D 0.81
218
+ HIS 217 H 0.96
219
+ MET 218 M 0.26
220
+ VAL 219 V 0.01
221
+ LEU 220 L 0.27
222
+ LEU 221 L 0.26
223
+ GLU 222 E 0.92
224
+ PHE 223 F 0.84
225
+ VAL 224 V 0.72
226
+ THR 225 T 1.00
227
+ ALA 226 A 0.55
228
+ ALA 227 A 0.72
229
+ GLY 228 G 0.44
230
+ ILE 229 I 0.01
231
+ THR 230 T 0.98
232
+ ALA 1054 A 0.83
233
+ SER 1055 S 0.78
234
+ THR 1056 T 0.55
235
+ LYS 1057 K 0.40
236
+ LYS 1058 K 0.06
237
+ LEU 1059 L 0.82
238
+ SER 1060 S 0.59
239
+ GLU 1061 E 0.68
240
+ SER 1062 S 0.28
241
+ LEU 1063 L 0.79
242
+ LYS 1064 K 0.94
243
+ ARG 1065 R 0.32
244
+ ILE 1066 I 0.28
245
+ GLY 1067 G 0.94
246
+ ASP 1068 D 0.19
247
+ GLU 1069 E 0.76
248
+ LEU 1070 L 0.19
249
+ ASP 1071 D 0.14
250
+ SER 1072 S 0.04
251
+ ASN 1073 N 0.39
252
+ MET 1074 M 0.50
253
+ GLU 1075 E 0.92
254
+ LEU 1076 L 0.81
255
+ GLN 1077 Q 0.04
256
+ ARG 1078 R 0.97
257
+ MET 1079 M 0.20
258
+ ILE 1080 I 0.90
259
+ ALA 1081 A 0.43
260
+ ALA 1082 A 0.93
261
+ VAL 1083 V 0.28
262
+ ASP 1084 D 0.29
263
+ THR 1085 T 0.83
264
+ ASP 1086 D 0.79
265
+ SER 1087 S 0.39
266
+ PRO 1088 P 0.85
267
+ ARG 1089 R 0.41
268
+ GLU 1090 E 0.08
269
+ VAL 1091 V 0.10
270
+ PHE 1092 F 0.15
271
+ PHE 1093 F 0.10
272
+ ARG 1094 R 0.59
273
+ VAL 1095 V 0.69
274
+ ALA 1096 A 0.50
275
+ ALA 1097 A 0.86
276
+ ASP 1098 D 0.77
277
+ MET 1099 M 0.60
278
+ PHE 1100 F 0.13
279
+ SER 1101 S 0.22
280
+ ASP 1102 D 0.29
281
+ GLY 1103 G 0.22
282
+ ASN 1104 N 0.01
283
+ PHE 1105 F 0.24
284
+ ASN 1106 N 0.48
285
+ TRP 1107 W 0.45
286
+ GLY 1108 G 0.52
287
+ ARG 1109 R 0.86
288
+ VAL 1110 V 0.68
289
+ VAL 1111 V 0.96
290
+ ALA 1112 A 0.01
291
+ LEU 1113 L 0.88
292
+ PHE 1114 F 0.66
293
+ TYR 1115 Y 0.11
294
+ PHE 1116 F 0.62
295
+ ALA 1117 A 0.62
296
+ SER 1118 S 0.26
297
+ LYS 1119 K 0.58
298
+ LEU 1120 L 0.18
299
+ VAL 1121 V 0.85
300
+ LEU 1122 L 0.27
app.py CHANGED
@@ -1,6 +1,9 @@
1
  import gradio as gr
2
  import requests
3
- from Bio.PDB import PDBParser
 
 
 
4
  import numpy as np
5
  import os
6
  from gradio_molecule3d import Molecule3D
@@ -25,6 +28,8 @@ from datasets import Dataset
25
 
26
  from scipy.special import expit
27
 
 
 
28
  # Load model and move to device
29
  checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
30
  max_length = 1500
@@ -37,119 +42,250 @@ def normalize_scores(scores):
37
  min_score = np.min(scores)
38
  max_score = np.max(scores)
39
  return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
40
-
41
  def read_mol(pdb_path):
42
  """Read PDB file and return its content as a string"""
43
  with open(pdb_path, 'r') as f:
44
  return f.read()
45
 
46
- def fetch_pdb(pdb_id):
47
- pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
48
- pdb_path = f'{pdb_id}.pdb'
49
- response = requests.get(pdb_url)
50
- if response.status_code == 200:
51
- with open(pdb_path, 'wb') as f:
52
- f.write(response.content)
53
- return pdb_path
54
  else:
55
  return None
56
 
57
- def process_pdb(pdb_id, segment):
58
- pdb_path = fetch_pdb(pdb_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  if not pdb_path:
60
- return "Failed to fetch PDB file", None, None
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- parser = PDBParser(QUIET=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  structure = parser.get_structure('protein', pdb_path)
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  try:
66
  chain = structure[0][segment]
67
  except KeyError:
68
  return "Invalid Chain ID", None, None
69
 
70
-
71
- aa_dict = {
72
- 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',
73
- 'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',
74
- 'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',
75
- 'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',
76
- 'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'
77
- }
78
-
79
- # Exclude non-amino acid residues
80
- sequence = "".join(
81
- aa_dict[residue.get_resname().strip()]
82
- for residue in chain
83
- if residue.get_resname().strip() in aa_dict
84
- )
85
- sequence2 = [
86
- (res.id[1], res) for res in chain
87
- if res.get_resname().strip() in aa_dict
88
- ]
89
 
90
  # Prepare input for model prediction
91
  input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
92
  with torch.no_grad():
93
  outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
94
-
95
  # Calculate scores and normalize them
96
  scores = expit(outputs[:, 1] - outputs[:, 0])
97
  normalized_scores = normalize_scores(scores)
98
-
99
- # Zip residues with scores to track the residue ID and score
100
- residue_scores = [(resi, score) for (resi, _), score in zip(sequence2, normalized_scores)]
101
 
102
- result_str = "\n".join([
103
- f"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
104
- for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict
105
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
- # Save the predictions to a file
108
  prediction_file = f"{pdb_id}_predictions.txt"
109
  with open(prediction_file, "w") as f:
110
  f.write(result_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- return result_str, molecule(pdb_path, residue_scores, segment), prediction_file
 
113
 
114
  def molecule(input_pdb, residue_scores=None, segment='A'):
115
  mol = read_mol(input_pdb) # Read PDB file content
116
-
117
  # Prepare high-scoring residues script if scores are provided
118
  high_score_script = ""
119
  if residue_scores is not None:
120
- # Sort residues based on their scores
121
  high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
122
  mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
123
 
124
  high_score_script = """
125
- // Reset all styles first
126
- viewer.getModel(0).setStyle({}, {});
127
-
128
- // Show only the selected chain
129
- viewer.getModel(0).setStyle(
130
  {"chain": "%s"},
131
- { cartoon: {colorscheme:"whiteCarbon"} }
132
  );
133
-
134
- // Highlight high-scoring residues only for the selected chain
135
- let highScoreResidues = [%s];
136
- viewer.getModel(0).setStyle(
137
- {"chain": "%s", "resi": highScoreResidues},
 
138
  {"stick": {"color": "red"}}
139
  );
140
 
141
- // Highlight medium-scoring residues only for the selected chain
142
- let midScoreResidues = [%s];
143
- viewer.getModel(0).setStyle(
144
- {"chain": "%s", "resi": midScoreResidues},
 
145
  {"stick": {"color": "orange"}}
146
  );
147
- """ % (segment,
148
- ", ".join(str(resi) for resi in high_score_residues),
149
- segment,
150
- ", ".join(str(resi) for resi in mid_score_residues),
151
- segment)
 
 
152
 
 
153
  html_content = f"""
154
  <!DOCTYPE html>
155
  <html>
@@ -173,13 +309,6 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
173
  let element = $("#container");
174
  let config = {{ backgroundColor: "white" }};
175
  let viewer = $3Dmol.createViewer(element, config);
176
- viewer.addModel(pdb, "pdb");
177
-
178
- // Reset all styles and show only selected chain
179
- viewer.getModel(0).setStyle(
180
- {{"chain": "{segment}"}},
181
- {{ cartoon: {{ colorscheme:"whiteCarbon" }} }}
182
- );
183
 
184
  {high_score_script}
185
 
@@ -221,39 +350,50 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
221
  # Return the HTML content within an iframe safely encoded for special characters
222
  return f'<iframe width="100%" height="700" srcdoc="{html_content.replace(chr(34), "&quot;").replace(chr(39), "&#39;")}"></iframe>'
223
 
224
- reps = [
225
- {
226
- "model": 0,
227
- "style": "cartoon",
228
- "color": "whiteCarbon",
229
- "residue_range": "",
230
- "around": 0,
231
- "byres": False,
232
- }
233
- ]
234
 
235
  # Gradio UI
236
  with gr.Blocks() as demo:
237
  gr.Markdown("# Protein Binding Site Prediction")
 
238
  with gr.Row():
239
- pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
240
  visualize_btn = gr.Button("Visualize Structure")
241
 
242
- molecule_output2 = Molecule3D(label="Protein Structure", reps=reps)
 
 
 
 
 
 
 
 
 
243
 
244
  with gr.Row():
245
- #pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
246
  segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
247
  prediction_btn = gr.Button("Predict Binding Site")
248
 
 
249
  molecule_output = gr.HTML(label="Protein Structure")
250
  predictions_output = gr.Textbox(label="Binding Site Predictions")
251
- download_output = gr.File(label="Download Predictions")
252
-
253
- visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)
254
-
255
- prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  gr.Markdown("## Examples")
258
  gr.Examples(
259
  examples=[
 
1
  import gradio as gr
2
  import requests
3
+ from Bio.PDB import PDBParser, MMCIFParser, PDBIO
4
+ from Bio.PDB.Polypeptide import is_aa
5
+ from Bio.SeqUtils import seq1
6
+ from typing import Optional, Tuple
7
  import numpy as np
8
  import os
9
  from gradio_molecule3d import Molecule3D
 
28
 
29
  from scipy.special import expit
30
 
31
+
32
+
33
  # Load model and move to device
34
  checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
35
  max_length = 1500
 
42
  min_score = np.min(scores)
43
  max_score = np.max(scores)
44
  return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
45
+
46
  def read_mol(pdb_path):
47
  """Read PDB file and return its content as a string"""
48
  with open(pdb_path, 'r') as f:
49
  return f.read()
50
 
51
+ def fetch_structure(pdb_id: str, output_dir: str = ".") -> Optional[str]:
52
+ """
53
+ Fetch the structure file for a given PDB ID. Prioritizes CIF files.
54
+ If a structure file already exists locally, it uses that.
55
+ """
56
+ file_path = download_structure(pdb_id, output_dir)
57
+ if file_path:
58
+ return file_path
59
  else:
60
  return None
61
 
62
+ def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:
63
+ """
64
+ Attempt to download the structure file in CIF or PDB format.
65
+ Returns the path to the downloaded file, or None if download fails.
66
+ """
67
+ for ext in ['.cif', '.pdb']:
68
+ file_path = os.path.join(output_dir, f"{pdb_id}{ext}")
69
+ if os.path.exists(file_path):
70
+ return file_path
71
+ url = f"https://files.rcsb.org/download/{pdb_id}{ext}"
72
+ try:
73
+ response = requests.get(url, timeout=10)
74
+ if response.status_code == 200:
75
+ with open(file_path, 'wb') as f:
76
+ f.write(response.content)
77
+ return file_path
78
+ except Exception as e:
79
+ print(f"Download error for {pdb_id}{ext}: {e}")
80
+ return None
81
+
82
+ def convert_cif_to_pdb(cif_path: str, output_dir: str = ".") -> str:
83
+ """
84
+ Convert a CIF file to PDB format using BioPython and return the PDB file path.
85
+ """
86
+ pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))
87
+ parser = MMCIFParser(QUIET=True)
88
+ structure = parser.get_structure('protein', cif_path)
89
+ io = PDBIO()
90
+ io.set_structure(structure)
91
+ io.save(pdb_path)
92
+ return pdb_path
93
+
94
+ def fetch_pdb(pdb_id):
95
+ pdb_path = fetch_structure(pdb_id)
96
  if not pdb_path:
97
+ return None
98
+ _, ext = os.path.splitext(pdb_path)
99
+ if ext == '.cif':
100
+ pdb_path = convert_cif_to_pdb(pdb_path)
101
+ return pdb_path
102
+
103
+ def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:
104
+ """
105
+ Create a PDB file with only the specified chain and replace B-factor with prediction scores
106
+ """
107
+ # Read the original PDB file
108
+ parser = PDBParser(QUIET=True)
109
+ structure = parser.get_structure('protein', input_pdb)
110
 
111
+ # Prepare a new structure with only the specified chain
112
+ new_structure = structure.copy()
113
+ for model in new_structure:
114
+ # Remove all chains except the specified one
115
+ chains_to_remove = [chain for chain in model if chain.id != chain_id]
116
+ for chain in chains_to_remove:
117
+ model.detach_child(chain.id)
118
+
119
+ # Create a modified PDB with scores in B-factor
120
+ scores_dict = {resi: score for resi, score in residue_scores}
121
+ for model in new_structure:
122
+ for chain in model:
123
+ for residue in chain:
124
+ if residue.id[1] in scores_dict:
125
+ for atom in residue:
126
+ atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range
127
+
128
+ # Save the modified structure
129
+ output_pdb = f"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb"
130
+ io = PDBIO()
131
+ io.set_structure(new_structure)
132
+ io.save(output_pdb)
133
+
134
+ return output_pdb
135
+
136
+ def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):
137
+ """
138
+ Calculate the geometric center of high-scoring residues
139
+ """
140
+ parser = PDBParser(QUIET=True)
141
  structure = parser.get_structure('protein', pdb_path)
142
 
143
+ # Collect coordinates of CA atoms from high-scoring residues
144
+ coords = []
145
+ for model in structure:
146
+ for chain in model:
147
+ if chain.id == chain_id:
148
+ for residue in chain:
149
+ if residue.id[1] in high_score_residues:
150
+ if 'CA' in residue: # Use alpha carbon as representative
151
+ ca_atom = residue['CA']
152
+ coords.append(ca_atom.coord)
153
+
154
+ # Calculate geometric center
155
+ if coords:
156
+ center = np.mean(coords, axis=0)
157
+ return center
158
+ return None
159
+
160
+
161
+
162
+ def process_pdb(pdb_id_or_file, segment):
163
+ # Determine if input is a PDB ID or file path
164
+ if pdb_id_or_file.endswith('.pdb'):
165
+ pdb_path = pdb_id_or_file
166
+ pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]
167
+ else:
168
+ pdb_id = pdb_id_or_file
169
+ pdb_path = fetch_pdb(pdb_id)
170
+
171
+ if not pdb_path:
172
+ return "Failed to fetch PDB file", None, None
173
+
174
+ # Determine the file format and choose the appropriate parser
175
+ _, ext = os.path.splitext(pdb_path)
176
+ parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)
177
+
178
+ try:
179
+ # Parse the structure file
180
+ structure = parser.get_structure('protein', pdb_path)
181
+ except Exception as e:
182
+ return f"Error parsing structure file: {e}", None, None
183
+
184
+ # Extract the specified chain
185
  try:
186
  chain = structure[0][segment]
187
  except KeyError:
188
  return "Invalid Chain ID", None, None
189
 
190
+ protein_residues = [res for res in chain if is_aa(res)]
191
+ sequence = "".join(seq1(res.resname) for res in protein_residues)
192
+ sequence_id = [res.id[1] for res in protein_residues]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  # Prepare input for model prediction
195
  input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
196
  with torch.no_grad():
197
  outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
198
+
199
  # Calculate scores and normalize them
200
  scores = expit(outputs[:, 1] - outputs[:, 0])
201
  normalized_scores = normalize_scores(scores)
 
 
 
202
 
203
+ # Zip residues with scores to track the residue ID and score
204
+ residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]
205
+
206
+ # Identify high and mid scoring residues
207
+ high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
208
+ mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
209
+
210
+ # Calculate geometric center of high-scoring residues
211
+ geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)
212
+ pymol_selection = f"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}"
213
+ pymol_center_cmd = f"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}" if geo_center is not None else ""
214
+
215
+ # Generate the result string
216
+ current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
217
+ result_str = f"Prediction for PDB: {pdb_id}, Chain: {segment}\nDate: {current_time}\n\n"
218
+ result_str += "Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\n\n"
219
+ result_str += "\n".join([
220
+ f"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
221
+ for i, res in enumerate(protein_residues)])
222
 
223
+ # Create prediction and scored PDB files
224
  prediction_file = f"{pdb_id}_predictions.txt"
225
  with open(prediction_file, "w") as f:
226
  f.write(result_str)
227
+
228
+ # Create chain-specific PDB with scores in B-factor
229
+ scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)
230
+
231
+ # Molecule visualization with updated script
232
+ mol_vis = molecule(pdb_path, residue_scores, segment)
233
+
234
+ # Construct PyMOL command suggestions
235
+ pymol_commands = f"""
236
+ PyMOL Visualization Commands:
237
+ 1. Load PDB: load {os.path.abspath(pdb_path)}
238
+ 2. Select high-scoring residues: {pymol_selection}
239
+ 3. Highlight high-scoring residues: show sticks, high_score_residues
240
+ {pymol_center_cmd}
241
+ """
242
 
243
+ return result_str + "\n\n" + pymol_commands, mol_vis, [prediction_file, scored_pdb]
244
+
245
 
246
  def molecule(input_pdb, residue_scores=None, segment='A'):
247
  mol = read_mol(input_pdb) # Read PDB file content
248
+
249
  # Prepare high-scoring residues script if scores are provided
250
  high_score_script = ""
251
  if residue_scores is not None:
252
+ # Filter residues based on their scores
253
  high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
254
  mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
255
 
256
  high_score_script = """
257
+ // Load the original model and apply white cartoon style
258
+ let chainModel = viewer.addModel(pdb, "pdb");
259
+ chainModel.setStyle({}, {});
260
+ chainModel.setStyle(
 
261
  {"chain": "%s"},
262
+ {"cartoon": {"color": "white"}}
263
  );
264
+
265
+ // Create a new model for high-scoring residues and apply red sticks style
266
+ let highScoreModel = viewer.addModel(pdb, "pdb");
267
+ highScoreModel.setStyle({}, {});
268
+ highScoreModel.setStyle(
269
+ {"chain": "%s", "resi": [%s]},
270
  {"stick": {"color": "red"}}
271
  );
272
 
273
+ // Create a new model for medium-scoring residues and apply orange sticks style
274
+ let midScoreModel = viewer.addModel(pdb, "pdb");
275
+ midScoreModel.setStyle({}, {});
276
+ midScoreModel.setStyle(
277
+ {"chain": "%s", "resi": [%s]},
278
  {"stick": {"color": "orange"}}
279
  );
280
+ """ % (
281
+ segment,
282
+ segment,
283
+ ", ".join(str(resi) for resi in high_score_residues),
284
+ segment,
285
+ ", ".join(str(resi) for resi in mid_score_residues)
286
+ )
287
 
288
+ # Generate the full HTML content
289
  html_content = f"""
290
  <!DOCTYPE html>
291
  <html>
 
309
  let element = $("#container");
310
  let config = {{ backgroundColor: "white" }};
311
  let viewer = $3Dmol.createViewer(element, config);
 
 
 
 
 
 
 
312
 
313
  {high_score_script}
314
 
 
350
  # Return the HTML content within an iframe safely encoded for special characters
351
  return f'<iframe width="100%" height="700" srcdoc="{html_content.replace(chr(34), "&quot;").replace(chr(39), "&#39;")}"></iframe>'
352
 
 
 
 
 
 
 
 
 
 
 
353
 
354
  # Gradio UI
355
  with gr.Blocks() as demo:
356
  gr.Markdown("# Protein Binding Site Prediction")
357
+
358
  with gr.Row():
359
+ pdb_input = gr.Textbox(value="4BDU", label="PDB ID", placeholder="Enter PDB ID here...")
360
  visualize_btn = gr.Button("Visualize Structure")
361
 
362
+ molecule_output2 = Molecule3D(label="Protein Structure", reps=[
363
+ {
364
+ "model": 0,
365
+ "style": "cartoon",
366
+ "color": "whiteCarbon",
367
+ "residue_range": "",
368
+ "around": 0,
369
+ "byres": False,
370
+ }
371
+ ])
372
 
373
  with gr.Row():
 
374
  segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
375
  prediction_btn = gr.Button("Predict Binding Site")
376
 
377
+
378
  molecule_output = gr.HTML(label="Protein Structure")
379
  predictions_output = gr.Textbox(label="Binding Site Predictions")
380
+ download_output = gr.File(label="Download Files", file_count="multiple")
 
 
 
 
381
 
382
+ prediction_btn.click(
383
+ process_pdb,
384
+ inputs=[
385
+ pdb_input,
386
+ segment_input
387
+ ],
388
+ outputs=[predictions_output, molecule_output, download_output]
389
+ )
390
+
391
+ visualize_btn.click(
392
+ fetch_pdb,
393
+ inputs=[pdb_input],
394
+ outputs=molecule_output2
395
+ )
396
+
397
  gr.Markdown("## Examples")
398
  gr.Examples(
399
  examples=[
test3.ipynb ADDED
@@ -0,0 +1,1599 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 18,
6
+ "id": "2b84eb4e-3f91-4a28-8e4f-322a34a9fb55",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "* Running on local URL: http://127.0.0.1:7877\n",
14
+ "* Running on public URL: https://a35567ec94eccaf8d1.gradio.live\n",
15
+ "\n",
16
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
17
+ ]
18
+ },
19
+ {
20
+ "data": {
21
+ "text/html": [
22
+ "<div><iframe src=\"https://a35567ec94eccaf8d1.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
23
+ ],
24
+ "text/plain": [
25
+ "<IPython.core.display.HTML object>"
26
+ ]
27
+ },
28
+ "metadata": {},
29
+ "output_type": "display_data"
30
+ },
31
+ {
32
+ "data": {
33
+ "text/plain": []
34
+ },
35
+ "execution_count": 18,
36
+ "metadata": {},
37
+ "output_type": "execute_result"
38
+ }
39
+ ],
40
+ "source": [
41
+ "from Bio.PDB import PDBParser, MMCIFParser, MMCIF2Dict, PDBIO\n",
42
+ "from Bio.PDB.Polypeptide import is_aa\n",
43
+ "from Bio.SeqUtils import seq1\n",
44
+ "import gradio as gr\n",
45
+ "import numpy as np\n",
46
+ "import os\n",
47
+ "import requests\n",
48
+ "from gradio_molecule3d import Molecule3D\n",
49
+ "from scipy.special import expit\n",
50
+ "from typing import Optional\n",
51
+ "\n",
52
+ "def normalize_scores(scores):\n",
53
+ " min_score = np.min(scores)\n",
54
+ " max_score = np.max(scores)\n",
55
+ " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
56
+ "\n",
57
+ "def read_mol(pdb_path):\n",
58
+ " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
59
+ " with open(pdb_path, 'r') as f:\n",
60
+ " return f.read()\n",
61
+ "\n",
62
+ "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
63
+ " \"\"\"\n",
64
+ " Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
65
+ " If a structure file already exists locally, it uses that.\n",
66
+ " \"\"\"\n",
67
+ " file_path = download_structure(pdb_id, output_dir)\n",
68
+ " if file_path:\n",
69
+ " return file_path\n",
70
+ " else:\n",
71
+ " return None\n",
72
+ "\n",
73
+ "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
74
+ " \"\"\"\n",
75
+ " Attempt to download the structure file in CIF or PDB format.\n",
76
+ " Returns the path to the downloaded file, or None if download fails.\n",
77
+ " \"\"\"\n",
78
+ " for ext in ['.cif', '.pdb']:\n",
79
+ " file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
80
+ " if os.path.exists(file_path):\n",
81
+ " return file_path\n",
82
+ " url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
83
+ " try:\n",
84
+ " response = requests.get(url, timeout=10)\n",
85
+ " if response.status_code == 200:\n",
86
+ " with open(file_path, 'wb') as f:\n",
87
+ " f.write(response.content)\n",
88
+ " return file_path\n",
89
+ " except Exception as e:\n",
90
+ " print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
91
+ " return None\n",
92
+ "\n",
93
+ "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
94
+ " \"\"\"\n",
95
+ " Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
96
+ " \"\"\"\n",
97
+ " pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
98
+ " parser = MMCIFParser(QUIET=True)\n",
99
+ " structure = parser.get_structure('protein', cif_path)\n",
100
+ " io = PDBIO()\n",
101
+ " io.set_structure(structure)\n",
102
+ " io.save(pdb_path)\n",
103
+ " return pdb_path\n",
104
+ "\n",
105
+ "def fetch_pdb(pdb_id):\n",
106
+ " pdb_path = fetch_structure(pdb_id)\n",
107
+ " if not pdb_path:\n",
108
+ " return None\n",
109
+ " _, ext = os.path.splitext(pdb_path)\n",
110
+ " if ext == '.cif':\n",
111
+ " pdb_path = convert_cif_to_pdb(pdb_path)\n",
112
+ " return pdb_path\n",
113
+ "\n",
114
+ "def process_pdb(pdb_id, segment):\n",
115
+ " # Fetch the PDB or CIF file\n",
116
+ " pdb_path = fetch_pdb(pdb_id)\n",
117
+ " if not pdb_path:\n",
118
+ " return \"Failed to fetch PDB file\", None, None\n",
119
+ " \n",
120
+ " # Determine the file format and choose the appropriate parser\n",
121
+ " _, ext = os.path.splitext(pdb_path)\n",
122
+ " parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
123
+ " \n",
124
+ " try:\n",
125
+ " # Parse the structure file\n",
126
+ " structure = parser.get_structure('protein', pdb_path)\n",
127
+ " except Exception as e:\n",
128
+ " return f\"Error parsing structure file: {e}\", None, None\n",
129
+ " \n",
130
+ " # Extract the specified chain\n",
131
+ " try:\n",
132
+ " chain = structure[0][segment]\n",
133
+ " except KeyError:\n",
134
+ " return \"Invalid Chain ID\", None, None\n",
135
+ " \n",
136
+ " protein_residues = [res for res in chain if is_aa(res)]\n",
137
+ " sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
138
+ " sequence_id = [res.id[1] for res in protein_residues]\n",
139
+ " \n",
140
+ " # Generate random scores for residues\n",
141
+ " scores = np.random.rand(len(sequence))\n",
142
+ " normalized_scores = normalize_scores(scores)\n",
143
+ " \n",
144
+ " # Zip residues with scores to track the residue ID and score\n",
145
+ " residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
146
+ "\n",
147
+ " # Generate the result string\n",
148
+ " result_str = \"\\n\".join([\n",
149
+ " f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
150
+ " for i, res in enumerate(protein_residues)])\n",
151
+ " \n",
152
+ " # Save the predictions to a file\n",
153
+ " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
154
+ " with open(prediction_file, \"w\") as f:\n",
155
+ " f.write(result_str)\n",
156
+ "\n",
157
+ " _, ext = os.path.splitext(pdb_path)\n",
158
+ " if ext == '.cif':\n",
159
+ " pdb_path = convert_cif_to_pdb(pdb_path)\n",
160
+ "\n",
161
+ " return result_str, molecule(pdb_path, residue_scores, segment), prediction_file\n",
162
+ "\n",
163
+ "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
164
+ " mol = read_mol(input_pdb) # Read PDB file content\n",
165
+ " \n",
166
+ " # Prepare high-scoring residues script if scores are provided\n",
167
+ " high_score_script = \"\"\n",
168
+ " if residue_scores is not None:\n",
169
+ " # Sort residues based on their scores\n",
170
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
171
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
172
+ " \n",
173
+ " high_score_script = \"\"\"\n",
174
+ " // Reset all styles first\n",
175
+ " viewer.getModel(0).setStyle({}, {});\n",
176
+ " \n",
177
+ " // Show only the selected chain\n",
178
+ " viewer.getModel(0).setStyle(\n",
179
+ " {\"chain\": \"%s\"}, \n",
180
+ " { cartoon: {colorscheme:\"whiteCarbon\"} }\n",
181
+ " );\n",
182
+ " \n",
183
+ " // Highlight high-scoring residues only for the selected chain\n",
184
+ " let highScoreResidues = [%s];\n",
185
+ " viewer.getModel(0).setStyle(\n",
186
+ " {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
187
+ " {\"stick\": {\"color\": \"red\"}}\n",
188
+ " );\n",
189
+ "\n",
190
+ " // Highlight medium-scoring residues only for the selected chain\n",
191
+ " let midScoreResidues = [%s];\n",
192
+ " viewer.getModel(0).setStyle(\n",
193
+ " {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
194
+ " {\"stick\": {\"color\": \"orange\"}}\n",
195
+ " );\n",
196
+ " \"\"\" % (segment, \n",
197
+ " \", \".join(str(resi) for resi in high_score_residues),\n",
198
+ " segment,\n",
199
+ " \", \".join(str(resi) for resi in mid_score_residues),\n",
200
+ " segment)\n",
201
+ " \n",
202
+ " html_content = f\"\"\"\n",
203
+ " <!DOCTYPE html>\n",
204
+ " <html>\n",
205
+ " <head> \n",
206
+ " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
207
+ " <style>\n",
208
+ " .mol-container {{\n",
209
+ " width: 100%;\n",
210
+ " height: 700px;\n",
211
+ " position: relative;\n",
212
+ " }}\n",
213
+ " </style>\n",
214
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
215
+ " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
216
+ " </head>\n",
217
+ " <body>\n",
218
+ " <div id=\"container\" class=\"mol-container\"></div>\n",
219
+ " <script>\n",
220
+ " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
221
+ " $(document).ready(function () {{\n",
222
+ " let element = $(\"#container\");\n",
223
+ " let config = {{ backgroundColor: \"white\" }};\n",
224
+ " let viewer = $3Dmol.createViewer(element, config);\n",
225
+ " viewer.addModel(pdb, \"pdb\");\n",
226
+ " \n",
227
+ " // Reset all styles and show only selected chain\n",
228
+ " viewer.getModel(0).setStyle(\n",
229
+ " {{\"chain\": \"{segment}\"}}, \n",
230
+ " {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }}\n",
231
+ " );\n",
232
+ " \n",
233
+ " {high_score_script}\n",
234
+ " \n",
235
+ " // Add hover functionality\n",
236
+ " viewer.setHoverable(\n",
237
+ " {{}}, \n",
238
+ " true, \n",
239
+ " function(atom, viewer, event, container) {{\n",
240
+ " if (!atom.label) {{\n",
241
+ " atom.label = viewer.addLabel(\n",
242
+ " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
243
+ " {{\n",
244
+ " position: atom, \n",
245
+ " backgroundColor: 'mintcream', \n",
246
+ " fontColor: 'black',\n",
247
+ " fontSize: 12,\n",
248
+ " padding: 2\n",
249
+ " }}\n",
250
+ " );\n",
251
+ " }}\n",
252
+ " }},\n",
253
+ " function(atom, viewer) {{\n",
254
+ " if (atom.label) {{\n",
255
+ " viewer.removeLabel(atom.label);\n",
256
+ " delete atom.label;\n",
257
+ " }}\n",
258
+ " }}\n",
259
+ " );\n",
260
+ " \n",
261
+ " viewer.zoomTo();\n",
262
+ " viewer.render();\n",
263
+ " viewer.zoom(0.8, 2000);\n",
264
+ " }});\n",
265
+ " </script>\n",
266
+ " </body>\n",
267
+ " </html>\n",
268
+ " \"\"\"\n",
269
+ " \n",
270
+ " # Return the HTML content within an iframe safely encoded for special characters\n",
271
+ " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
272
+ "\n",
273
+ "reps = [\n",
274
+ " {\n",
275
+ " \"model\": 0,\n",
276
+ " \"style\": \"cartoon\",\n",
277
+ " \"color\": \"whiteCarbon\",\n",
278
+ " \"residue_range\": \"\",\n",
279
+ " \"around\": 0,\n",
280
+ " \"byres\": False,\n",
281
+ " }\n",
282
+ "]\n",
283
+ "\n",
284
+ "# Gradio UI\n",
285
+ "with gr.Blocks() as demo:\n",
286
+ " gr.Markdown(\"# Protein Binding Site Prediction\")\n",
287
+ " with gr.Row():\n",
288
+ " pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
289
+ " visualize_btn = gr.Button(\"Visualize Structure\")\n",
290
+ "\n",
291
+ " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
292
+ "\n",
293
+ " with gr.Row():\n",
294
+ " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
295
+ " prediction_btn = gr.Button(\"Predict Binding Site\")\n",
296
+ "\n",
297
+ " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
298
+ " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
299
+ " download_output = gr.File(label=\"Download Predictions\")\n",
300
+ " \n",
301
+ " visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n",
302
+ " \n",
303
+ " prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
304
+ " \n",
305
+ " gr.Markdown(\"## Examples\")\n",
306
+ " gr.Examples(\n",
307
+ " examples=[\n",
308
+ " [\"7RPZ\", \"A\"],\n",
309
+ " [\"2IWI\", \"B\"],\n",
310
+ " [\"2F6V\", \"A\"]\n",
311
+ " ],\n",
312
+ " inputs=[pdb_input, segment_input],\n",
313
+ " outputs=[predictions_output, molecule_output, download_output]\n",
314
+ " )\n",
315
+ "\n",
316
+ "demo.launch(share=True)"
317
+ ]
318
+ },
319
+ {
320
+ "cell_type": "code",
321
+ "execution_count": 20,
322
+ "id": "a2f1ca04-7a27-4e4f-b44d-39b20c5d034a",
323
+ "metadata": {},
324
+ "outputs": [
325
+ {
326
+ "name": "stdout",
327
+ "output_type": "stream",
328
+ "text": [
329
+ "* Running on local URL: http://127.0.0.1:7878\n",
330
+ "* Running on public URL: https://fbfb00e893a2d7c6ae.gradio.live\n",
331
+ "\n",
332
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
333
+ ]
334
+ },
335
+ {
336
+ "data": {
337
+ "text/html": [
338
+ "<div><iframe src=\"https://fbfb00e893a2d7c6ae.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
339
+ ],
340
+ "text/plain": [
341
+ "<IPython.core.display.HTML object>"
342
+ ]
343
+ },
344
+ "metadata": {},
345
+ "output_type": "display_data"
346
+ },
347
+ {
348
+ "data": {
349
+ "text/plain": []
350
+ },
351
+ "execution_count": 20,
352
+ "metadata": {},
353
+ "output_type": "execute_result"
354
+ }
355
+ ],
356
+ "source": [
357
+ "import os\n",
358
+ "from datetime import datetime\n",
359
+ "import gradio as gr\n",
360
+ "import numpy as np\n",
361
+ "import requests\n",
362
+ "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n",
363
+ "from Bio.PDB.Polypeptide import is_aa\n",
364
+ "from Bio.SeqUtils import seq1\n",
365
+ "from gradio_molecule3d import Molecule3D\n",
366
+ "from typing import Optional, Tuple\n",
367
+ "\n",
368
+ "def normalize_scores(scores):\n",
369
+ " min_score = np.min(scores)\n",
370
+ " max_score = np.max(scores)\n",
371
+ " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
372
+ "\n",
373
+ "def read_mol(pdb_path):\n",
374
+ " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
375
+ " with open(pdb_path, 'r') as f:\n",
376
+ " return f.read()\n",
377
+ "\n",
378
+ "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
379
+ " \"\"\"\n",
380
+ " Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
381
+ " If a structure file already exists locally, it uses that.\n",
382
+ " \"\"\"\n",
383
+ " file_path = download_structure(pdb_id, output_dir)\n",
384
+ " if file_path:\n",
385
+ " return file_path\n",
386
+ " else:\n",
387
+ " return None\n",
388
+ "\n",
389
+ "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
390
+ " \"\"\"\n",
391
+ " Attempt to download the structure file in CIF or PDB format.\n",
392
+ " Returns the path to the downloaded file, or None if download fails.\n",
393
+ " \"\"\"\n",
394
+ " for ext in ['.cif', '.pdb']:\n",
395
+ " file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
396
+ " if os.path.exists(file_path):\n",
397
+ " return file_path\n",
398
+ " url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
399
+ " try:\n",
400
+ " response = requests.get(url, timeout=10)\n",
401
+ " if response.status_code == 200:\n",
402
+ " with open(file_path, 'wb') as f:\n",
403
+ " f.write(response.content)\n",
404
+ " return file_path\n",
405
+ " except Exception as e:\n",
406
+ " print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
407
+ " return None\n",
408
+ "\n",
409
+ "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
410
+ " \"\"\"\n",
411
+ " Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
412
+ " \"\"\"\n",
413
+ " pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
414
+ " parser = MMCIFParser(QUIET=True)\n",
415
+ " structure = parser.get_structure('protein', cif_path)\n",
416
+ " io = PDBIO()\n",
417
+ " io.set_structure(structure)\n",
418
+ " io.save(pdb_path)\n",
419
+ " return pdb_path\n",
420
+ "\n",
421
+ "def fetch_pdb(pdb_id):\n",
422
+ " pdb_path = fetch_structure(pdb_id)\n",
423
+ " if not pdb_path:\n",
424
+ " return None\n",
425
+ " _, ext = os.path.splitext(pdb_path)\n",
426
+ " if ext == '.cif':\n",
427
+ " pdb_path = convert_cif_to_pdb(pdb_path)\n",
428
+ " return pdb_path\n",
429
+ "\n",
430
+ "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n",
431
+ " \"\"\"\n",
432
+ " Create a PDB file with only the specified chain and replace B-factor with prediction scores\n",
433
+ " \"\"\"\n",
434
+ " # Read the original PDB file\n",
435
+ " parser = PDBParser(QUIET=True)\n",
436
+ " structure = parser.get_structure('protein', input_pdb)\n",
437
+ " \n",
438
+ " # Prepare a new structure with only the specified chain\n",
439
+ " new_structure = structure.copy()\n",
440
+ " for model in new_structure:\n",
441
+ " # Remove all chains except the specified one\n",
442
+ " chains_to_remove = [chain for chain in model if chain.id != chain_id]\n",
443
+ " for chain in chains_to_remove:\n",
444
+ " model.detach_child(chain.id)\n",
445
+ " \n",
446
+ " # Create a modified PDB with scores in B-factor\n",
447
+ " scores_dict = {resi: score for resi, score in residue_scores}\n",
448
+ " for model in new_structure:\n",
449
+ " for chain in model:\n",
450
+ " for residue in chain:\n",
451
+ " if residue.id[1] in scores_dict:\n",
452
+ " for atom in residue:\n",
453
+ " atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range\n",
454
+ " \n",
455
+ " # Save the modified structure\n",
456
+ " output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n",
457
+ " io = PDBIO()\n",
458
+ " io.set_structure(new_structure)\n",
459
+ " io.save(output_pdb)\n",
460
+ " \n",
461
+ " return output_pdb\n",
462
+ "\n",
463
+ "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n",
464
+ " \"\"\"\n",
465
+ " Calculate the geometric center of high-scoring residues\n",
466
+ " \"\"\"\n",
467
+ " parser = PDBParser(QUIET=True)\n",
468
+ " structure = parser.get_structure('protein', pdb_path)\n",
469
+ " \n",
470
+ " # Collect coordinates of CA atoms from high-scoring residues\n",
471
+ " coords = []\n",
472
+ " for model in structure:\n",
473
+ " for chain in model:\n",
474
+ " if chain.id == chain_id:\n",
475
+ " for residue in chain:\n",
476
+ " if residue.id[1] in high_score_residues:\n",
477
+ " if 'CA' in residue: # Use alpha carbon as representative\n",
478
+ " ca_atom = residue['CA']\n",
479
+ " coords.append(ca_atom.coord)\n",
480
+ " \n",
481
+ " # Calculate geometric center\n",
482
+ " if coords:\n",
483
+ " center = np.mean(coords, axis=0)\n",
484
+ " return center\n",
485
+ " return None\n",
486
+ "\n",
487
+ "def process_pdb(pdb_id_or_file, segment):\n",
488
+ " # Determine if input is a PDB ID or file path\n",
489
+ " if pdb_id_or_file.endswith('.pdb'):\n",
490
+ " pdb_path = pdb_id_or_file\n",
491
+ " pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n",
492
+ " else:\n",
493
+ " pdb_id = pdb_id_or_file\n",
494
+ " pdb_path = fetch_pdb(pdb_id)\n",
495
+ " \n",
496
+ " if not pdb_path:\n",
497
+ " return \"Failed to fetch PDB file\", None, None\n",
498
+ " \n",
499
+ " # Determine the file format and choose the appropriate parser\n",
500
+ " _, ext = os.path.splitext(pdb_path)\n",
501
+ " parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
502
+ " \n",
503
+ " try:\n",
504
+ " # Parse the structure file\n",
505
+ " structure = parser.get_structure('protein', pdb_path)\n",
506
+ " except Exception as e:\n",
507
+ " return f\"Error parsing structure file: {e}\", None, None\n",
508
+ " \n",
509
+ " # Extract the specified chain\n",
510
+ " try:\n",
511
+ " chain = structure[0][segment]\n",
512
+ " except KeyError:\n",
513
+ " return \"Invalid Chain ID\", None, None\n",
514
+ " \n",
515
+ " protein_residues = [res for res in chain if is_aa(res)]\n",
516
+ " sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
517
+ " sequence_id = [res.id[1] for res in protein_residues]\n",
518
+ " \n",
519
+ " # Generate random scores for residues\n",
520
+ " scores = np.random.rand(len(sequence))\n",
521
+ " normalized_scores = normalize_scores(scores)\n",
522
+ " \n",
523
+ " # Zip residues with scores to track the residue ID and score\n",
524
+ " residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
525
+ "\n",
526
+ " # Identify high and mid scoring residues\n",
527
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
528
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
529
+ "\n",
530
+ " # Calculate geometric center of high-scoring residues\n",
531
+ " geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n",
532
+ " pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n",
533
+ " pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n",
534
+ "\n",
535
+ " # Generate the result string\n",
536
+ " current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
537
+ " result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n",
538
+ " result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n",
539
+ " result_str += \"\\n\".join([\n",
540
+ " f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
541
+ " for i, res in enumerate(protein_residues)])\n",
542
+ " \n",
543
+ " # Create prediction and scored PDB files\n",
544
+ " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
545
+ " with open(prediction_file, \"w\") as f:\n",
546
+ " f.write(result_str)\n",
547
+ "\n",
548
+ " # Create chain-specific PDB with scores in B-factor\n",
549
+ " scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n",
550
+ "\n",
551
+ " # Molecule visualization with updated script\n",
552
+ " mol_vis = molecule(pdb_path, residue_scores, segment)\n",
553
+ "\n",
554
+ " # Construct PyMOL command suggestions\n",
555
+ " pymol_commands = f\"\"\"\n",
556
+ "PyMOL Visualization Commands:\n",
557
+ "1. Load PDB: load {os.path.abspath(pdb_path)}\n",
558
+ "2. Select high-scoring residues: {pymol_selection}\n",
559
+ "3. Highlight high-scoring residues: show sticks, high_score_residues\n",
560
+ "{pymol_center_cmd}\n",
561
+ "\"\"\"\n",
562
+ " \n",
563
+ " return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n",
564
+ "\n",
565
+ "# molecule() function remains the same as in the previous script, \n",
566
+ "# but modify the visualization script to ensure cartoon is below stick representations\n",
567
+ "\n",
568
+ "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
569
+ " mol = read_mol(input_pdb) # Read PDB file content\n",
570
+ " \n",
571
+ " # Prepare high-scoring residues script if scores are provided\n",
572
+ " high_score_script = \"\"\n",
573
+ " if residue_scores is not None:\n",
574
+ " # Sort residues based on their scores\n",
575
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
576
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
577
+ " \n",
578
+ " high_score_script = \"\"\"\n",
579
+ " // Reset all styles first\n",
580
+ " viewer.getModel(0).setStyle({}, {});\n",
581
+ " \n",
582
+ " // First, set background cartoon style for the entire chain (underneath)\n",
583
+ " viewer.getModel(0).setStyle(\n",
584
+ " {\"chain\": \"%s\"}, \n",
585
+ " { cartoon: {colorscheme:\"whiteCarbon\", opacity:0.7} }\n",
586
+ " );\n",
587
+ " \n",
588
+ " // Highlight high-scoring residues with sticks on top\n",
589
+ " let highScoreResidues = [%s];\n",
590
+ " viewer.getModel(0).setStyle(\n",
591
+ " {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
592
+ " {\"stick\": {\"color\": \"red\", \"opacity\": 1}}\n",
593
+ " );\n",
594
+ "\n",
595
+ " // Highlight medium-scoring residues\n",
596
+ " let midScoreResidues = [%s];\n",
597
+ " viewer.getModel(0).setStyle(\n",
598
+ " {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
599
+ " {\"stick\": {\"color\": \"orange\", \"opacity\": 0.8}}\n",
600
+ " );\n",
601
+ " \"\"\" % (segment, \n",
602
+ " \", \".join(str(resi) for resi in high_score_residues),\n",
603
+ " segment,\n",
604
+ " \", \".join(str(resi) for resi in mid_score_residues),\n",
605
+ " segment)\n",
606
+ " \n",
607
+ " # Rest of the molecule() function remains the same as in the previous script\n",
608
+ " \n",
609
+ " html_content = f\"\"\"\n",
610
+ " <!DOCTYPE html>\n",
611
+ " <html>\n",
612
+ " <head> \n",
613
+ " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
614
+ " <style>\n",
615
+ " .mol-container {{\n",
616
+ " width: 100%;\n",
617
+ " height: 700px;\n",
618
+ " position: relative;\n",
619
+ " }}\n",
620
+ " </style>\n",
621
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
622
+ " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
623
+ " </head>\n",
624
+ " <body>\n",
625
+ " <div id=\"container\" class=\"mol-container\"></div>\n",
626
+ " <script>\n",
627
+ " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
628
+ " $(document).ready(function () {{\n",
629
+ " let element = $(\"#container\");\n",
630
+ " let config = {{ backgroundColor: \"white\" }};\n",
631
+ " let viewer = $3Dmol.createViewer(element, config);\n",
632
+ " viewer.addModel(pdb, \"pdb\");\n",
633
+ " \n",
634
+ " {high_score_script}\n",
635
+ " \n",
636
+ " // Add hover functionality (unchanged from before)\n",
637
+ " viewer.setHoverable(\n",
638
+ " {{}}, \n",
639
+ " true, \n",
640
+ " function(atom, viewer, event, container) {{\n",
641
+ " if (!atom.label) {{\n",
642
+ " atom.label = viewer.addLabel(\n",
643
+ " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
644
+ " {{\n",
645
+ " position: atom, \n",
646
+ " backgroundColor: 'mintcream', \n",
647
+ " fontColor: 'black',\n",
648
+ " fontSize: 12,\n",
649
+ " padding: 2\n",
650
+ " }}\n",
651
+ " );\n",
652
+ " }}\n",
653
+ " }},\n",
654
+ " function(atom, viewer) {{\n",
655
+ " if (atom.label) {{\n",
656
+ " viewer.removeLabel(atom.label);\n",
657
+ " delete atom.label;\n",
658
+ " }}\n",
659
+ " }}\n",
660
+ " );\n",
661
+ " \n",
662
+ " viewer.zoomTo();\n",
663
+ " viewer.render();\n",
664
+ " viewer.zoom(0.8, 2000);\n",
665
+ " }});\n",
666
+ " </script>\n",
667
+ " </body>\n",
668
+ " </html>\n",
669
+ " \"\"\"\n",
670
+ " \n",
671
+ " # Return the HTML content within an iframe safely encoded for special characters\n",
672
+ " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
673
+ "\n",
674
+ "# Gradio UI\n",
675
+ "with gr.Blocks() as demo:\n",
676
+ " gr.Markdown(\"# Protein Binding Site Prediction\")\n",
677
+ " \n",
678
+ " with gr.Row():\n",
679
+ " pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
680
+ " file_input = gr.File(label=\"Or Upload PDB File\", file_types=['.pdb'], type=\"filepath\")\n",
681
+ " visualize_btn = gr.Button(\"Visualize Structure\")\n",
682
+ "\n",
683
+ " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n",
684
+ " {\n",
685
+ " \"model\": 0,\n",
686
+ " \"style\": \"cartoon\",\n",
687
+ " \"color\": \"whiteCarbon\",\n",
688
+ " \"residue_range\": \"\",\n",
689
+ " \"around\": 0,\n",
690
+ " \"byres\": False,\n",
691
+ " }\n",
692
+ " ])\n",
693
+ "\n",
694
+ " with gr.Row():\n",
695
+ " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
696
+ " prediction_btn = gr.Button(\"Predict Binding Site\")\n",
697
+ "\n",
698
+ " def process_input(pdb_id, uploaded_file):\n",
699
+ " \"\"\"\n",
700
+ " Determine whether to use PDB ID or uploaded file\n",
701
+ " \"\"\"\n",
702
+ " if uploaded_file and uploaded_file.endswith('.pdb'):\n",
703
+ " return uploaded_file\n",
704
+ " return pdb_id\n",
705
+ "\n",
706
+ " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
707
+ " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
708
+ " download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n",
709
+ " \n",
710
+ " prediction_btn.click(\n",
711
+ " process_pdb, \n",
712
+ " inputs=[\n",
713
+ " gr.State(lambda: process_input(pdb_input.value, file_input.value)), \n",
714
+ " segment_input\n",
715
+ " ], \n",
716
+ " outputs=[predictions_output, molecule_output, download_output]\n",
717
+ " )\n",
718
+ "\n",
719
+ " visualize_btn.click(\n",
720
+ " fetch_pdb, \n",
721
+ " inputs=[pdb_input], \n",
722
+ " outputs=molecule_output2\n",
723
+ " )\n",
724
+ "\n",
725
+ " gr.Markdown(\"## Examples\")\n",
726
+ " gr.Examples(\n",
727
+ " examples=[\n",
728
+ " [\"7RPZ\", \"A\"],\n",
729
+ " [\"2IWI\", \"B\"],\n",
730
+ " [\"2F6V\", \"A\"]\n",
731
+ " ],\n",
732
+ " inputs=[pdb_input, segment_input],\n",
733
+ " outputs=[predictions_output, molecule_output, download_output]\n",
734
+ " )\n",
735
+ "\n",
736
+ "demo.launch(share=True)"
737
+ ]
738
+ },
739
+ {
740
+ "cell_type": "code",
741
+ "execution_count": 32,
742
+ "id": "5b266025-7503-48f5-9371-3642d09f7e93",
743
+ "metadata": {},
744
+ "outputs": [
745
+ {
746
+ "name": "stdout",
747
+ "output_type": "stream",
748
+ "text": [
749
+ "* Running on local URL: http://127.0.0.1:7890\n",
750
+ "* Running on public URL: https://70a6e80d8deb42ddd0.gradio.live\n",
751
+ "\n",
752
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
753
+ ]
754
+ },
755
+ {
756
+ "data": {
757
+ "text/html": [
758
+ "<div><iframe src=\"https://70a6e80d8deb42ddd0.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
759
+ ],
760
+ "text/plain": [
761
+ "<IPython.core.display.HTML object>"
762
+ ]
763
+ },
764
+ "metadata": {},
765
+ "output_type": "display_data"
766
+ },
767
+ {
768
+ "data": {
769
+ "text/plain": []
770
+ },
771
+ "execution_count": 32,
772
+ "metadata": {},
773
+ "output_type": "execute_result"
774
+ }
775
+ ],
776
+ "source": [
777
+ "import os\n",
778
+ "from datetime import datetime\n",
779
+ "import gradio as gr\n",
780
+ "import numpy as np\n",
781
+ "import requests\n",
782
+ "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n",
783
+ "from Bio.PDB.Polypeptide import is_aa\n",
784
+ "from Bio.SeqUtils import seq1\n",
785
+ "from gradio_molecule3d import Molecule3D\n",
786
+ "from typing import Optional, Tuple\n",
787
+ "\n",
788
+ "def normalize_scores(scores):\n",
789
+ " min_score = np.min(scores)\n",
790
+ " max_score = np.max(scores)\n",
791
+ " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
792
+ "\n",
793
+ "def read_mol(pdb_path):\n",
794
+ " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
795
+ " with open(pdb_path, 'r') as f:\n",
796
+ " return f.read()\n",
797
+ "\n",
798
+ "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
799
+ " \"\"\"\n",
800
+ " Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
801
+ " If a structure file already exists locally, it uses that.\n",
802
+ " \"\"\"\n",
803
+ " file_path = download_structure(pdb_id, output_dir)\n",
804
+ " if file_path:\n",
805
+ " return file_path\n",
806
+ " else:\n",
807
+ " return None\n",
808
+ "\n",
809
+ "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
810
+ " \"\"\"\n",
811
+ " Attempt to download the structure file in CIF or PDB format.\n",
812
+ " Returns the path to the downloaded file, or None if download fails.\n",
813
+ " \"\"\"\n",
814
+ " for ext in ['.cif', '.pdb']:\n",
815
+ " file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
816
+ " if os.path.exists(file_path):\n",
817
+ " return file_path\n",
818
+ " url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
819
+ " try:\n",
820
+ " response = requests.get(url, timeout=10)\n",
821
+ " if response.status_code == 200:\n",
822
+ " with open(file_path, 'wb') as f:\n",
823
+ " f.write(response.content)\n",
824
+ " return file_path\n",
825
+ " except Exception as e:\n",
826
+ " print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
827
+ " return None\n",
828
+ "\n",
829
+ "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
830
+ " \"\"\"\n",
831
+ " Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
832
+ " \"\"\"\n",
833
+ " pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
834
+ " parser = MMCIFParser(QUIET=True)\n",
835
+ " structure = parser.get_structure('protein', cif_path)\n",
836
+ " io = PDBIO()\n",
837
+ " io.set_structure(structure)\n",
838
+ " io.save(pdb_path)\n",
839
+ " return pdb_path\n",
840
+ "\n",
841
+ "def fetch_pdb(pdb_id):\n",
842
+ " pdb_path = fetch_structure(pdb_id)\n",
843
+ " if not pdb_path:\n",
844
+ " return None\n",
845
+ " _, ext = os.path.splitext(pdb_path)\n",
846
+ " if ext == '.cif':\n",
847
+ " pdb_path = convert_cif_to_pdb(pdb_path)\n",
848
+ " return pdb_path\n",
849
+ "\n",
850
+ "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n",
851
+ " \"\"\"\n",
852
+ " Create a PDB file with only the specified chain and replace B-factor with prediction scores\n",
853
+ " \"\"\"\n",
854
+ " # Read the original PDB file\n",
855
+ " parser = PDBParser(QUIET=True)\n",
856
+ " structure = parser.get_structure('protein', input_pdb)\n",
857
+ " \n",
858
+ " # Prepare a new structure with only the specified chain\n",
859
+ " new_structure = structure.copy()\n",
860
+ " for model in new_structure:\n",
861
+ " # Remove all chains except the specified one\n",
862
+ " chains_to_remove = [chain for chain in model if chain.id != chain_id]\n",
863
+ " for chain in chains_to_remove:\n",
864
+ " model.detach_child(chain.id)\n",
865
+ " \n",
866
+ " # Create a modified PDB with scores in B-factor\n",
867
+ " scores_dict = {resi: score for resi, score in residue_scores}\n",
868
+ " for model in new_structure:\n",
869
+ " for chain in model:\n",
870
+ " for residue in chain:\n",
871
+ " if residue.id[1] in scores_dict:\n",
872
+ " for atom in residue:\n",
873
+ " atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range\n",
874
+ " \n",
875
+ " # Save the modified structure\n",
876
+ " output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n",
877
+ " io = PDBIO()\n",
878
+ " io.set_structure(new_structure)\n",
879
+ " io.save(output_pdb)\n",
880
+ " \n",
881
+ " return output_pdb\n",
882
+ "\n",
883
+ "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n",
884
+ " \"\"\"\n",
885
+ " Calculate the geometric center of high-scoring residues\n",
886
+ " \"\"\"\n",
887
+ " parser = PDBParser(QUIET=True)\n",
888
+ " structure = parser.get_structure('protein', pdb_path)\n",
889
+ " \n",
890
+ " # Collect coordinates of CA atoms from high-scoring residues\n",
891
+ " coords = []\n",
892
+ " for model in structure:\n",
893
+ " for chain in model:\n",
894
+ " if chain.id == chain_id:\n",
895
+ " for residue in chain:\n",
896
+ " if residue.id[1] in high_score_residues:\n",
897
+ " if 'CA' in residue: # Use alpha carbon as representative\n",
898
+ " ca_atom = residue['CA']\n",
899
+ " coords.append(ca_atom.coord)\n",
900
+ " \n",
901
+ " # Calculate geometric center\n",
902
+ " if coords:\n",
903
+ " center = np.mean(coords, axis=0)\n",
904
+ " return center\n",
905
+ " return None\n",
906
+ "\n",
907
+ "def process_pdb(pdb_id_or_file, segment):\n",
908
+ " # Determine if input is a PDB ID or file path\n",
909
+ " if pdb_id_or_file.endswith('.pdb'):\n",
910
+ " pdb_path = pdb_id_or_file\n",
911
+ " pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n",
912
+ " else:\n",
913
+ " pdb_id = pdb_id_or_file\n",
914
+ " pdb_path = fetch_pdb(pdb_id)\n",
915
+ " \n",
916
+ " if not pdb_path:\n",
917
+ " return \"Failed to fetch PDB file\", None, None\n",
918
+ " \n",
919
+ " # Determine the file format and choose the appropriate parser\n",
920
+ " _, ext = os.path.splitext(pdb_path)\n",
921
+ " parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
922
+ " \n",
923
+ " try:\n",
924
+ " # Parse the structure file\n",
925
+ " structure = parser.get_structure('protein', pdb_path)\n",
926
+ " except Exception as e:\n",
927
+ " return f\"Error parsing structure file: {e}\", None, None\n",
928
+ " \n",
929
+ " # Extract the specified chain\n",
930
+ " try:\n",
931
+ " chain = structure[0][segment]\n",
932
+ " except KeyError:\n",
933
+ " return \"Invalid Chain ID\", None, None\n",
934
+ " \n",
935
+ " protein_residues = [res for res in chain if is_aa(res)]\n",
936
+ " sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
937
+ " sequence_id = [res.id[1] for res in protein_residues]\n",
938
+ " \n",
939
+ " # Generate random scores for residues\n",
940
+ " scores = np.random.rand(len(sequence))\n",
941
+ " normalized_scores = normalize_scores(scores)\n",
942
+ " \n",
943
+ " # Zip residues with scores to track the residue ID and score\n",
944
+ " residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
945
+ "\n",
946
+ " # Identify high and mid scoring residues\n",
947
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
948
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
949
+ "\n",
950
+ " # Calculate geometric center of high-scoring residues\n",
951
+ " geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n",
952
+ " pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n",
953
+ " pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n",
954
+ "\n",
955
+ " # Generate the result string\n",
956
+ " current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
957
+ " result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n",
958
+ " result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n",
959
+ " result_str += \"\\n\".join([\n",
960
+ " f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
961
+ " for i, res in enumerate(protein_residues)])\n",
962
+ " \n",
963
+ " # Create prediction and scored PDB files\n",
964
+ " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
965
+ " with open(prediction_file, \"w\") as f:\n",
966
+ " f.write(result_str)\n",
967
+ "\n",
968
+ " # Create chain-specific PDB with scores in B-factor\n",
969
+ " scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n",
970
+ "\n",
971
+ " # Molecule visualization with updated script\n",
972
+ " mol_vis = molecule(pdb_path, residue_scores, segment)\n",
973
+ "\n",
974
+ " # Construct PyMOL command suggestions\n",
975
+ " pymol_commands = f\"\"\"\n",
976
+ "PyMOL Visualization Commands:\n",
977
+ "1. Load PDB: load {os.path.abspath(pdb_path)}\n",
978
+ "2. Select high-scoring residues: {pymol_selection}\n",
979
+ "3. Highlight high-scoring residues: show sticks, high_score_residues\n",
980
+ "{pymol_center_cmd}\n",
981
+ "\"\"\"\n",
982
+ " \n",
983
+ " return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n",
984
+ "\n",
985
+ "# molecule() function remains the same as in the previous script, \n",
986
+ "# but modify the visualization script to ensure cartoon is below stick representations\n",
987
+ "\n",
988
+ "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
989
+ " mol = read_mol(input_pdb) # Read PDB file content\n",
990
+ "\n",
991
+ " # Prepare high-scoring residues script if scores are provided\n",
992
+ " high_score_script = \"\"\n",
993
+ " if residue_scores is not None:\n",
994
+ " # Filter residues based on their scores\n",
995
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
996
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
997
+ " \n",
998
+ " high_score_script = \"\"\"\n",
999
+ " // Load the original model and apply white cartoon style\n",
1000
+ " let chainModel = viewer.addModel(pdb, \"pdb\");\n",
1001
+ " chainModel.setStyle(\n",
1002
+ " {\"chain\": \"%s\"}, \n",
1003
+ " {\"cartoon\": {\"color\": \"white\"}}\n",
1004
+ " );\n",
1005
+ "\n",
1006
+ " // Create a new model for high-scoring residues and apply red sticks style\n",
1007
+ " let highScoreModel = viewer.addModel(pdb, \"pdb\");\n",
1008
+ " highScoreModel.setStyle(\n",
1009
+ " {\"chain\": \"%s\", \"resi\": [%s]}, \n",
1010
+ " {\"stick\": {\"color\": \"red\"}}\n",
1011
+ " );\n",
1012
+ "\n",
1013
+ " // Create a new model for medium-scoring residues and apply orange sticks style\n",
1014
+ " let midScoreModel = viewer.addModel(pdb, \"pdb\");\n",
1015
+ " midScoreModel.setStyle(\n",
1016
+ " {\"chain\": \"%s\", \"resi\": [%s]}, \n",
1017
+ " {\"stick\": {\"color\": \"orange\"}}\n",
1018
+ " );\n",
1019
+ " \"\"\" % (\n",
1020
+ " segment,\n",
1021
+ " segment,\n",
1022
+ " \", \".join(str(resi) for resi in high_score_residues),\n",
1023
+ " segment,\n",
1024
+ " \", \".join(str(resi) for resi in mid_score_residues)\n",
1025
+ " )\n",
1026
+ " \n",
1027
+ " # Generate the full HTML content\n",
1028
+ " html_content = f\"\"\"\n",
1029
+ " <!DOCTYPE html>\n",
1030
+ " <html>\n",
1031
+ " <head> \n",
1032
+ " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
1033
+ " <style>\n",
1034
+ " .mol-container {{\n",
1035
+ " width: 100%;\n",
1036
+ " height: 700px;\n",
1037
+ " position: relative;\n",
1038
+ " }}\n",
1039
+ " </style>\n",
1040
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
1041
+ " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
1042
+ " </head>\n",
1043
+ " <body>\n",
1044
+ " <div id=\"container\" class=\"mol-container\"></div>\n",
1045
+ " <script>\n",
1046
+ " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
1047
+ " $(document).ready(function () {{\n",
1048
+ " let element = $(\"#container\");\n",
1049
+ " let config = {{ backgroundColor: \"white\" }};\n",
1050
+ " let viewer = $3Dmol.createViewer(element, config);\n",
1051
+ " \n",
1052
+ " {high_score_script}\n",
1053
+ " \n",
1054
+ " // Add hover functionality\n",
1055
+ " viewer.setHoverable(\n",
1056
+ " {{}}, \n",
1057
+ " true, \n",
1058
+ " function(atom, viewer, event, container) {{\n",
1059
+ " if (!atom.label) {{\n",
1060
+ " atom.label = viewer.addLabel(\n",
1061
+ " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
1062
+ " {{\n",
1063
+ " position: atom, \n",
1064
+ " backgroundColor: 'mintcream', \n",
1065
+ " fontColor: 'black',\n",
1066
+ " fontSize: 12,\n",
1067
+ " padding: 2\n",
1068
+ " }}\n",
1069
+ " );\n",
1070
+ " }}\n",
1071
+ " }},\n",
1072
+ " function(atom, viewer) {{\n",
1073
+ " if (atom.label) {{\n",
1074
+ " viewer.removeLabel(atom.label);\n",
1075
+ " delete atom.label;\n",
1076
+ " }}\n",
1077
+ " }}\n",
1078
+ " );\n",
1079
+ " \n",
1080
+ " viewer.zoomTo();\n",
1081
+ " viewer.render();\n",
1082
+ " viewer.zoom(0.8, 2000);\n",
1083
+ " }});\n",
1084
+ " </script>\n",
1085
+ " </body>\n",
1086
+ " </html>\n",
1087
+ " \"\"\"\n",
1088
+ " \n",
1089
+ " # Return the HTML content within an iframe safely encoded for special characters\n",
1090
+ " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
1091
+ "\n",
1092
+ "\n",
1093
+ "# Gradio UI\n",
1094
+ "with gr.Blocks() as demo:\n",
1095
+ " gr.Markdown(\"# Protein Binding Site Prediction\")\n",
1096
+ " \n",
1097
+ " with gr.Row():\n",
1098
+ " pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1099
+ " visualize_btn = gr.Button(\"Visualize Structure\")\n",
1100
+ "\n",
1101
+ " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n",
1102
+ " {\n",
1103
+ " \"model\": 0,\n",
1104
+ " \"style\": \"cartoon\",\n",
1105
+ " \"color\": \"whiteCarbon\",\n",
1106
+ " \"residue_range\": \"\",\n",
1107
+ " \"around\": 0,\n",
1108
+ " \"byres\": False,\n",
1109
+ " }\n",
1110
+ " ])\n",
1111
+ "\n",
1112
+ " with gr.Row():\n",
1113
+ " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
1114
+ " prediction_btn = gr.Button(\"Predict Binding Site\")\n",
1115
+ "\n",
1116
+ "\n",
1117
+ " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
1118
+ " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
1119
+ " download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n",
1120
+ " \n",
1121
+ " prediction_btn.click(\n",
1122
+ " process_pdb, \n",
1123
+ " inputs=[\n",
1124
+ " pdb_input, \n",
1125
+ " segment_input\n",
1126
+ " ], \n",
1127
+ " outputs=[predictions_output, molecule_output, download_output]\n",
1128
+ " )\n",
1129
+ "\n",
1130
+ " visualize_btn.click(\n",
1131
+ " fetch_pdb, \n",
1132
+ " inputs=[pdb_input], \n",
1133
+ " outputs=molecule_output2\n",
1134
+ " )\n",
1135
+ "\n",
1136
+ " gr.Markdown(\"## Examples\")\n",
1137
+ " gr.Examples(\n",
1138
+ " examples=[\n",
1139
+ " [\"7RPZ\", \"A\"],\n",
1140
+ " [\"2IWI\", \"B\"],\n",
1141
+ " [\"2F6V\", \"A\"]\n",
1142
+ " ],\n",
1143
+ " inputs=[pdb_input, segment_input],\n",
1144
+ " outputs=[predictions_output, molecule_output, download_output]\n",
1145
+ " )\n",
1146
+ "\n",
1147
+ "demo.launch(share=True)"
1148
+ ]
1149
+ },
1150
+ {
1151
+ "cell_type": "code",
1152
+ "execution_count": 39,
1153
+ "id": "514fad12-a31a-495f-af9e-04a18e11175e",
1154
+ "metadata": {},
1155
+ "outputs": [
1156
+ {
1157
+ "name": "stdout",
1158
+ "output_type": "stream",
1159
+ "text": [
1160
+ "* Running on local URL: http://127.0.0.1:7897\n",
1161
+ "* Running on public URL: https://0d9b5d36fa5302e0df.gradio.live\n",
1162
+ "\n",
1163
+ "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
1164
+ ]
1165
+ },
1166
+ {
1167
+ "data": {
1168
+ "text/html": [
1169
+ "<div><iframe src=\"https://0d9b5d36fa5302e0df.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
1170
+ ],
1171
+ "text/plain": [
1172
+ "<IPython.core.display.HTML object>"
1173
+ ]
1174
+ },
1175
+ "metadata": {},
1176
+ "output_type": "display_data"
1177
+ },
1178
+ {
1179
+ "data": {
1180
+ "text/plain": []
1181
+ },
1182
+ "execution_count": 39,
1183
+ "metadata": {},
1184
+ "output_type": "execute_result"
1185
+ }
1186
+ ],
1187
+ "source": [
1188
+ "import os\n",
1189
+ "from datetime import datetime\n",
1190
+ "import gradio as gr\n",
1191
+ "import numpy as np\n",
1192
+ "import requests\n",
1193
+ "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n",
1194
+ "from Bio.PDB.Polypeptide import is_aa\n",
1195
+ "from Bio.SeqUtils import seq1\n",
1196
+ "from gradio_molecule3d import Molecule3D\n",
1197
+ "from typing import Optional, Tuple\n",
1198
+ "\n",
1199
+ "def normalize_scores(scores):\n",
1200
+ " min_score = np.min(scores)\n",
1201
+ " max_score = np.max(scores)\n",
1202
+ " return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
1203
+ "\n",
1204
+ "def read_mol(pdb_path):\n",
1205
+ " \"\"\"Read PDB file and return its content as a string\"\"\"\n",
1206
+ " with open(pdb_path, 'r') as f:\n",
1207
+ " return f.read()\n",
1208
+ "\n",
1209
+ "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
1210
+ " \"\"\"\n",
1211
+ " Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
1212
+ " If a structure file already exists locally, it uses that.\n",
1213
+ " \"\"\"\n",
1214
+ " file_path = download_structure(pdb_id, output_dir)\n",
1215
+ " if file_path:\n",
1216
+ " return file_path\n",
1217
+ " else:\n",
1218
+ " return None\n",
1219
+ "\n",
1220
+ "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
1221
+ " \"\"\"\n",
1222
+ " Attempt to download the structure file in CIF or PDB format.\n",
1223
+ " Returns the path to the downloaded file, or None if download fails.\n",
1224
+ " \"\"\"\n",
1225
+ " for ext in ['.cif', '.pdb']:\n",
1226
+ " file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
1227
+ " if os.path.exists(file_path):\n",
1228
+ " return file_path\n",
1229
+ " url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
1230
+ " try:\n",
1231
+ " response = requests.get(url, timeout=10)\n",
1232
+ " if response.status_code == 200:\n",
1233
+ " with open(file_path, 'wb') as f:\n",
1234
+ " f.write(response.content)\n",
1235
+ " return file_path\n",
1236
+ " except Exception as e:\n",
1237
+ " print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
1238
+ " return None\n",
1239
+ "\n",
1240
+ "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
1241
+ " \"\"\"\n",
1242
+ " Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
1243
+ " \"\"\"\n",
1244
+ " pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
1245
+ " parser = MMCIFParser(QUIET=True)\n",
1246
+ " structure = parser.get_structure('protein', cif_path)\n",
1247
+ " io = PDBIO()\n",
1248
+ " io.set_structure(structure)\n",
1249
+ " io.save(pdb_path)\n",
1250
+ " return pdb_path\n",
1251
+ "\n",
1252
+ "def fetch_pdb(pdb_id):\n",
1253
+ " pdb_path = fetch_structure(pdb_id)\n",
1254
+ " if not pdb_path:\n",
1255
+ " return None\n",
1256
+ " _, ext = os.path.splitext(pdb_path)\n",
1257
+ " if ext == '.cif':\n",
1258
+ " pdb_path = convert_cif_to_pdb(pdb_path)\n",
1259
+ " return pdb_path\n",
1260
+ "\n",
1261
+ "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n",
1262
+ " \"\"\"\n",
1263
+ " Create a PDB file with only the specified chain and replace B-factor with prediction scores\n",
1264
+ " \"\"\"\n",
1265
+ " # Read the original PDB file\n",
1266
+ " parser = PDBParser(QUIET=True)\n",
1267
+ " structure = parser.get_structure('protein', input_pdb)\n",
1268
+ " \n",
1269
+ " # Prepare a new structure with only the specified chain\n",
1270
+ " new_structure = structure.copy()\n",
1271
+ " for model in new_structure:\n",
1272
+ " # Remove all chains except the specified one\n",
1273
+ " chains_to_remove = [chain for chain in model if chain.id != chain_id]\n",
1274
+ " for chain in chains_to_remove:\n",
1275
+ " model.detach_child(chain.id)\n",
1276
+ " \n",
1277
+ " # Create a modified PDB with scores in B-factor\n",
1278
+ " scores_dict = {resi: score for resi, score in residue_scores}\n",
1279
+ " for model in new_structure:\n",
1280
+ " for chain in model:\n",
1281
+ " for residue in chain:\n",
1282
+ " if residue.id[1] in scores_dict:\n",
1283
+ " for atom in residue:\n",
1284
+ " atom.bfactor = scores_dict[residue.id[1]] #* 100 # Scale score to B-factor range\n",
1285
+ " \n",
1286
+ " # Save the modified structure\n",
1287
+ " output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n",
1288
+ " io = PDBIO()\n",
1289
+ " io.set_structure(new_structure)\n",
1290
+ " io.save(output_pdb)\n",
1291
+ " \n",
1292
+ " return output_pdb\n",
1293
+ "\n",
1294
+ "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n",
1295
+ " \"\"\"\n",
1296
+ " Calculate the geometric center of high-scoring residues\n",
1297
+ " \"\"\"\n",
1298
+ " parser = PDBParser(QUIET=True)\n",
1299
+ " structure = parser.get_structure('protein', pdb_path)\n",
1300
+ " \n",
1301
+ " # Collect coordinates of CA atoms from high-scoring residues\n",
1302
+ " coords = []\n",
1303
+ " for model in structure:\n",
1304
+ " for chain in model:\n",
1305
+ " if chain.id == chain_id:\n",
1306
+ " for residue in chain:\n",
1307
+ " if residue.id[1] in high_score_residues:\n",
1308
+ " if 'CA' in residue: # Use alpha carbon as representative\n",
1309
+ " ca_atom = residue['CA']\n",
1310
+ " coords.append(ca_atom.coord)\n",
1311
+ " \n",
1312
+ " # Calculate geometric center\n",
1313
+ " if coords:\n",
1314
+ " center = np.mean(coords, axis=0)\n",
1315
+ " return center\n",
1316
+ " return None\n",
1317
+ "\n",
1318
+ "def process_pdb(pdb_id_or_file, segment):\n",
1319
+ " # Determine if input is a PDB ID or file path\n",
1320
+ " if pdb_id_or_file.endswith('.pdb'):\n",
1321
+ " pdb_path = pdb_id_or_file\n",
1322
+ " pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n",
1323
+ " else:\n",
1324
+ " pdb_id = pdb_id_or_file\n",
1325
+ " pdb_path = fetch_pdb(pdb_id)\n",
1326
+ " \n",
1327
+ " if not pdb_path:\n",
1328
+ " return \"Failed to fetch PDB file\", None, None\n",
1329
+ " \n",
1330
+ " # Determine the file format and choose the appropriate parser\n",
1331
+ " _, ext = os.path.splitext(pdb_path)\n",
1332
+ " parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
1333
+ " \n",
1334
+ " try:\n",
1335
+ " # Parse the structure file\n",
1336
+ " structure = parser.get_structure('protein', pdb_path)\n",
1337
+ " except Exception as e:\n",
1338
+ " return f\"Error parsing structure file: {e}\", None, None\n",
1339
+ " \n",
1340
+ " # Extract the specified chain\n",
1341
+ " try:\n",
1342
+ " chain = structure[0][segment]\n",
1343
+ " except KeyError:\n",
1344
+ " return \"Invalid Chain ID\", None, None\n",
1345
+ " \n",
1346
+ " protein_residues = [res for res in chain if is_aa(res)]\n",
1347
+ " sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
1348
+ " sequence_id = [res.id[1] for res in protein_residues]\n",
1349
+ " \n",
1350
+ " # Generate random scores for residues\n",
1351
+ " scores = np.random.rand(len(sequence))\n",
1352
+ " normalized_scores = normalize_scores(scores)\n",
1353
+ " \n",
1354
+ " # Zip residues with scores to track the residue ID and score\n",
1355
+ " residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
1356
+ "\n",
1357
+ " # Identify high and mid scoring residues\n",
1358
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
1359
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
1360
+ "\n",
1361
+ " # Calculate geometric center of high-scoring residues\n",
1362
+ " geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n",
1363
+ " pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n",
1364
+ " pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n",
1365
+ "\n",
1366
+ " # Generate the result string\n",
1367
+ " current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
1368
+ " result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n",
1369
+ " result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n",
1370
+ " result_str += \"\\n\".join([\n",
1371
+ " f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
1372
+ " for i, res in enumerate(protein_residues)])\n",
1373
+ " \n",
1374
+ " # Create prediction and scored PDB files\n",
1375
+ " prediction_file = f\"{pdb_id}_predictions.txt\"\n",
1376
+ " with open(prediction_file, \"w\") as f:\n",
1377
+ " f.write(result_str)\n",
1378
+ "\n",
1379
+ " # Create chain-specific PDB with scores in B-factor\n",
1380
+ " scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n",
1381
+ "\n",
1382
+ " # Molecule visualization with updated script\n",
1383
+ " mol_vis = molecule(pdb_path, residue_scores, segment)\n",
1384
+ "\n",
1385
+ " # Construct PyMOL command suggestions\n",
1386
+ " pymol_commands = f\"\"\"\n",
1387
+ "PyMOL Visualization Commands:\n",
1388
+ "1. Load PDB: load {os.path.abspath(pdb_path)}\n",
1389
+ "2. Select high-scoring residues: {pymol_selection}\n",
1390
+ "3. Highlight high-scoring residues: show sticks, high_score_residues\n",
1391
+ "{pymol_center_cmd}\n",
1392
+ "\"\"\"\n",
1393
+ " \n",
1394
+ " return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n",
1395
+ "\n",
1396
+ "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
1397
+ " mol = read_mol(input_pdb) # Read PDB file content\n",
1398
+ "\n",
1399
+ " # Prepare high-scoring residues script if scores are provided\n",
1400
+ " high_score_script = \"\"\n",
1401
+ " if residue_scores is not None:\n",
1402
+ " # Filter residues based on their scores\n",
1403
+ " high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
1404
+ " mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
1405
+ " \n",
1406
+ " high_score_script = \"\"\"\n",
1407
+ " // Load the original model and apply white cartoon style\n",
1408
+ " let chainModel = viewer.addModel(pdb, \"pdb\");\n",
1409
+ " chainModel.setStyle({}, {});\n",
1410
+ " chainModel.setStyle(\n",
1411
+ " {\"chain\": \"%s\"}, \n",
1412
+ " {\"cartoon\": {\"color\": \"white\"}}\n",
1413
+ " );\n",
1414
+ "\n",
1415
+ " // Create a new model for high-scoring residues and apply red sticks style\n",
1416
+ " let highScoreModel = viewer.addModel(pdb, \"pdb\");\n",
1417
+ " highScoreModel.setStyle({}, {});\n",
1418
+ " highScoreModel.setStyle(\n",
1419
+ " {\"chain\": \"%s\", \"resi\": [%s]}, \n",
1420
+ " {\"stick\": {\"color\": \"red\"}}\n",
1421
+ " );\n",
1422
+ "\n",
1423
+ " // Create a new model for medium-scoring residues and apply orange sticks style\n",
1424
+ " let midScoreModel = viewer.addModel(pdb, \"pdb\");\n",
1425
+ " midScoreModel.setStyle({}, {});\n",
1426
+ " midScoreModel.setStyle(\n",
1427
+ " {\"chain\": \"%s\", \"resi\": [%s]}, \n",
1428
+ " {\"stick\": {\"color\": \"orange\"}}\n",
1429
+ " );\n",
1430
+ " \"\"\" % (\n",
1431
+ " segment,\n",
1432
+ " segment,\n",
1433
+ " \", \".join(str(resi) for resi in high_score_residues),\n",
1434
+ " segment,\n",
1435
+ " \", \".join(str(resi) for resi in mid_score_residues)\n",
1436
+ " )\n",
1437
+ " \n",
1438
+ " # Generate the full HTML content\n",
1439
+ " html_content = f\"\"\"\n",
1440
+ " <!DOCTYPE html>\n",
1441
+ " <html>\n",
1442
+ " <head> \n",
1443
+ " <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
1444
+ " <style>\n",
1445
+ " .mol-container {{\n",
1446
+ " width: 100%;\n",
1447
+ " height: 700px;\n",
1448
+ " position: relative;\n",
1449
+ " }}\n",
1450
+ " </style>\n",
1451
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
1452
+ " <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
1453
+ " </head>\n",
1454
+ " <body>\n",
1455
+ " <div id=\"container\" class=\"mol-container\"></div>\n",
1456
+ " <script>\n",
1457
+ " let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
1458
+ " $(document).ready(function () {{\n",
1459
+ " let element = $(\"#container\");\n",
1460
+ " let config = {{ backgroundColor: \"white\" }};\n",
1461
+ " let viewer = $3Dmol.createViewer(element, config);\n",
1462
+ " \n",
1463
+ " {high_score_script}\n",
1464
+ " \n",
1465
+ " // Add hover functionality\n",
1466
+ " viewer.setHoverable(\n",
1467
+ " {{}}, \n",
1468
+ " true, \n",
1469
+ " function(atom, viewer, event, container) {{\n",
1470
+ " if (!atom.label) {{\n",
1471
+ " atom.label = viewer.addLabel(\n",
1472
+ " atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
1473
+ " {{\n",
1474
+ " position: atom, \n",
1475
+ " backgroundColor: 'mintcream', \n",
1476
+ " fontColor: 'black',\n",
1477
+ " fontSize: 12,\n",
1478
+ " padding: 2\n",
1479
+ " }}\n",
1480
+ " );\n",
1481
+ " }}\n",
1482
+ " }},\n",
1483
+ " function(atom, viewer) {{\n",
1484
+ " if (atom.label) {{\n",
1485
+ " viewer.removeLabel(atom.label);\n",
1486
+ " delete atom.label;\n",
1487
+ " }}\n",
1488
+ " }}\n",
1489
+ " );\n",
1490
+ " \n",
1491
+ " viewer.zoomTo();\n",
1492
+ " viewer.render();\n",
1493
+ " viewer.zoom(0.8, 2000);\n",
1494
+ " }});\n",
1495
+ " </script>\n",
1496
+ " </body>\n",
1497
+ " </html>\n",
1498
+ " \"\"\"\n",
1499
+ " \n",
1500
+ " # Return the HTML content within an iframe safely encoded for special characters\n",
1501
+ " return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
1502
+ "\n",
1503
+ "\n",
1504
+ "# Gradio UI\n",
1505
+ "with gr.Blocks() as demo:\n",
1506
+ " gr.Markdown(\"# Protein Binding Site Prediction\")\n",
1507
+ " \n",
1508
+ " with gr.Row():\n",
1509
+ " pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
1510
+ " visualize_btn = gr.Button(\"Visualize Structure\")\n",
1511
+ "\n",
1512
+ " molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n",
1513
+ " {\n",
1514
+ " \"model\": 0,\n",
1515
+ " \"style\": \"cartoon\",\n",
1516
+ " \"color\": \"whiteCarbon\",\n",
1517
+ " \"residue_range\": \"\",\n",
1518
+ " \"around\": 0,\n",
1519
+ " \"byres\": False,\n",
1520
+ " }\n",
1521
+ " ])\n",
1522
+ "\n",
1523
+ " with gr.Row():\n",
1524
+ " segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
1525
+ " prediction_btn = gr.Button(\"Predict Binding Site\")\n",
1526
+ "\n",
1527
+ "\n",
1528
+ " molecule_output = gr.HTML(label=\"Protein Structure\")\n",
1529
+ " predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
1530
+ " download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n",
1531
+ " \n",
1532
+ " prediction_btn.click(\n",
1533
+ " process_pdb, \n",
1534
+ " inputs=[\n",
1535
+ " pdb_input, \n",
1536
+ " segment_input\n",
1537
+ " ], \n",
1538
+ " outputs=[predictions_output, molecule_output, download_output]\n",
1539
+ " )\n",
1540
+ "\n",
1541
+ " visualize_btn.click(\n",
1542
+ " fetch_pdb, \n",
1543
+ " inputs=[pdb_input], \n",
1544
+ " outputs=molecule_output2\n",
1545
+ " )\n",
1546
+ "\n",
1547
+ " gr.Markdown(\"## Examples\")\n",
1548
+ " gr.Examples(\n",
1549
+ " examples=[\n",
1550
+ " [\"7RPZ\", \"A\"],\n",
1551
+ " [\"2IWI\", \"B\"],\n",
1552
+ " [\"2F6V\", \"A\"]\n",
1553
+ " ],\n",
1554
+ " inputs=[pdb_input, segment_input],\n",
1555
+ " outputs=[predictions_output, molecule_output, download_output]\n",
1556
+ " )\n",
1557
+ "\n",
1558
+ "demo.launch(share=True)"
1559
+ ]
1560
+ },
1561
+ {
1562
+ "cell_type": "code",
1563
+ "execution_count": null,
1564
+ "id": "2f960cc2-8330-40f1-b54d-693ce922fa74",
1565
+ "metadata": {},
1566
+ "outputs": [],
1567
+ "source": []
1568
+ },
1569
+ {
1570
+ "cell_type": "code",
1571
+ "execution_count": null,
1572
+ "id": "cec41eef-c414-440f-a0ea-63fc8d3acf0b",
1573
+ "metadata": {},
1574
+ "outputs": [],
1575
+ "source": []
1576
+ }
1577
+ ],
1578
+ "metadata": {
1579
+ "kernelspec": {
1580
+ "display_name": "Python (LLM)",
1581
+ "language": "python",
1582
+ "name": "llm"
1583
+ },
1584
+ "language_info": {
1585
+ "codemirror_mode": {
1586
+ "name": "ipython",
1587
+ "version": 3
1588
+ },
1589
+ "file_extension": ".py",
1590
+ "mimetype": "text/x-python",
1591
+ "name": "python",
1592
+ "nbconvert_exporter": "python",
1593
+ "pygments_lexer": "ipython3",
1594
+ "version": "3.12.7"
1595
+ }
1596
+ },
1597
+ "nbformat": 4,
1598
+ "nbformat_minor": 5
1599
+ }