Spaces:

ThorbenF
/

test_webpage

Running

App Files Files Community

ThorbenFroehlking commited on Dec 11, 2024

Commit

3a463dd

1 Parent(s): 64f6421

Update

Browse files

Files changed (16) hide show

.gradio/certificate.pem +31 -0
.ipynb_checkpoints/2IWI-checkpoint.pdb +0 -0
.ipynb_checkpoints/4BDU-checkpoint.pdb +0 -0
.ipynb_checkpoints/4BDU_A_scored-checkpoint.pdb +0 -0
.ipynb_checkpoints/app-checkpoint.py +230 -90
.ipynb_checkpoints/test3-checkpoint.ipynb +1599 -0
2IWI.cif +0 -0
2IWI.pdb +0 -0
2IWI_predictions.txt +249 -244
4BDU.cif +0 -0
4BDU.pdb +0 -0
4BDU_A_scored.pdb +0 -0
4BDU_C_scored.pdb +0 -0
4BDU_predictions.txt +300 -0
app.py +230 -90
test3.ipynb +1599 -0

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

.ipynb_checkpoints/2IWI-checkpoint.pdb ADDED Viewed

The diff for this file is too large to render. See raw diff

.ipynb_checkpoints/4BDU-checkpoint.pdb ADDED Viewed

The diff for this file is too large to render. See raw diff

.ipynb_checkpoints/4BDU_A_scored-checkpoint.pdb ADDED Viewed

The diff for this file is too large to render. See raw diff

.ipynb_checkpoints/app-checkpoint.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import gradio as gr
 import requests
-from Bio.PDB import PDBParser
 import numpy as np
 import os
 from gradio_molecule3d import Molecule3D
@@ -25,6 +28,8 @@ from datasets import Dataset
 from scipy.special import expit
 # Load model and move to device
 checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
 max_length = 1500
@@ -37,119 +42,250 @@ def normalize_scores(scores):
     min_score = np.min(scores)
     max_score = np.max(scores)
     return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
 def read_mol(pdb_path):
     """Read PDB file and return its content as a string"""
     with open(pdb_path, 'r') as f:
         return f.read()
-def fetch_pdb(pdb_id):
-    pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
-    pdb_path = f'{pdb_id}.pdb'
-    response = requests.get(pdb_url)
-    if response.status_code == 200:
-        with open(pdb_path, 'wb') as f:
-            f.write(response.content)
-        return pdb_path
     else:
         return None
-def process_pdb(pdb_id, segment):
-    pdb_path = fetch_pdb(pdb_id)
     if not pdb_path:
-        return "Failed to fetch PDB file", None, None
-    parser = PDBParser(QUIET=1)
     structure = parser.get_structure('protein', pdb_path)
     try:
         chain = structure[0][segment]
     except KeyError:
         return "Invalid Chain ID", None, None
-    aa_dict = {
-        'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',
-        'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',
-        'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',
-        'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',
-        'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'
-    }
-    # Exclude non-amino acid residues
-    sequence = "".join(
-        aa_dict[residue.get_resname().strip()]
-        for residue in chain
-        if residue.get_resname().strip() in aa_dict
-    )
-    sequence2 = [
-        (res.id[1], res) for res in chain
-        if res.get_resname().strip() in aa_dict
-    ]
     # Prepare input for model prediction
     input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
         outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
     # Calculate scores and normalize them
     scores = expit(outputs[:, 1] - outputs[:, 0])
     normalized_scores = normalize_scores(scores)
-    # Zip residues with scores to track the residue ID and score
-    residue_scores = [(resi, score) for (resi, _), score in zip(sequence2, normalized_scores)]
-    result_str = "\n".join([
-        f"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
-        for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict
-    ])
-    # Save the predictions to a file
     prediction_file = f"{pdb_id}_predictions.txt"
     with open(prediction_file, "w") as f:
         f.write(result_str)
-    return result_str, molecule(pdb_path, residue_scores, segment), prediction_file
 def molecule(input_pdb, residue_scores=None, segment='A'):
     mol = read_mol(input_pdb)  # Read PDB file content
     # Prepare high-scoring residues script if scores are provided
     high_score_script = ""
     if residue_scores is not None:
-        # Sort residues based on their scores
         high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
         mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
         high_score_script = """
-        // Reset all styles first
-        viewer.getModel(0).setStyle({}, {});
-        // Show only the selected chain
-        viewer.getModel(0).setStyle(
             {"chain": "%s"},
-            { cartoon: {colorscheme:"whiteCarbon"} }
         );
-        // Highlight high-scoring residues only for the selected chain
-        let highScoreResidues = [%s];
-        viewer.getModel(0).setStyle(
-            {"chain": "%s", "resi": highScoreResidues},
             {"stick": {"color": "red"}}
         );
-        // Highlight medium-scoring residues only for the selected chain
-        let midScoreResidues = [%s];
-        viewer.getModel(0).setStyle(
-            {"chain": "%s", "resi": midScoreResidues},
             {"stick": {"color": "orange"}}
         );
-        """ % (segment,
-               ", ".join(str(resi) for resi in high_score_residues),
-               segment,
-               ", ".join(str(resi) for resi in mid_score_residues),
-               segment)
     html_content = f"""
     <!DOCTYPE html>
     <html>
@@ -173,13 +309,6 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
                 let element = $("#container");
                 let config = {{ backgroundColor: "white" }};
                 let viewer = $3Dmol.createViewer(element, config);
-                viewer.addModel(pdb, "pdb");
-                // Reset all styles and show only selected chain
-                viewer.getModel(0).setStyle(
-                    {{"chain": "{segment}"}},
-                    {{ cartoon: {{ colorscheme:"whiteCarbon" }} }}
-                );
                 {high_score_script}
@@ -221,39 +350,50 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
     # Return the HTML content within an iframe safely encoded for special characters
     return f'<iframe width="100%" height="700" srcdoc="{html_content.replace(chr(34), "&quot;").replace(chr(39), "&#39;")}"></iframe>'
-reps =    [
-        {
-          "model": 0,
-          "style": "cartoon",
-          "color": "whiteCarbon",
-          "residue_range": "",
-          "around": 0,
-          "byres": False,
-        }
-    ]
 # Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# Protein Binding Site Prediction")
     with gr.Row():
-        pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
         visualize_btn = gr.Button("Visualize Structure")
-    molecule_output2 = Molecule3D(label="Protein Structure", reps=reps)
     with gr.Row():
-        #pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
         segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
         prediction_btn = gr.Button("Predict Binding Site")
     molecule_output = gr.HTML(label="Protein Structure")
     predictions_output = gr.Textbox(label="Binding Site Predictions")
-    download_output = gr.File(label="Download Predictions")
-    visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)
-    prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])
     gr.Markdown("## Examples")
     gr.Examples(
         examples=[

 import gradio as gr
 import requests
+from Bio.PDB import PDBParser, MMCIFParser, PDBIO
+from Bio.PDB.Polypeptide import is_aa
+from Bio.SeqUtils import seq1
+from typing import Optional, Tuple
 import numpy as np
 import os
 from gradio_molecule3d import Molecule3D
 from scipy.special import expit
 # Load model and move to device
 checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
 max_length = 1500
     min_score = np.min(scores)
     max_score = np.max(scores)
     return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
 def read_mol(pdb_path):
     """Read PDB file and return its content as a string"""
     with open(pdb_path, 'r') as f:
         return f.read()
+def fetch_structure(pdb_id: str, output_dir: str = ".") -> Optional[str]:
+    """
+    Fetch the structure file for a given PDB ID. Prioritizes CIF files.
+    If a structure file already exists locally, it uses that.
+    """
+    file_path = download_structure(pdb_id, output_dir)
+    if file_path:
+        return file_path
     else:
         return None
+def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:
+    """
+    Attempt to download the structure file in CIF or PDB format.
+    Returns the path to the downloaded file, or None if download fails.
+    """
+    for ext in ['.cif', '.pdb']:
+        file_path = os.path.join(output_dir, f"{pdb_id}{ext}")
+        if os.path.exists(file_path):
+            return file_path
+        url = f"https://files.rcsb.org/download/{pdb_id}{ext}"
+        try:
+            response = requests.get(url, timeout=10)
+            if response.status_code == 200:
+                with open(file_path, 'wb') as f:
+                    f.write(response.content)
+                return file_path
+        except Exception as e:
+            print(f"Download error for {pdb_id}{ext}: {e}")
+    return None
+def convert_cif_to_pdb(cif_path: str, output_dir: str = ".") -> str:
+    """
+    Convert a CIF file to PDB format using BioPython and return the PDB file path.
+    """
+    pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))
+    parser = MMCIFParser(QUIET=True)
+    structure = parser.get_structure('protein', cif_path)
+    io = PDBIO()
+    io.set_structure(structure)
+    io.save(pdb_path)
+    return pdb_path
+def fetch_pdb(pdb_id):
+    pdb_path = fetch_structure(pdb_id)
     if not pdb_path:
+        return None
+    _, ext = os.path.splitext(pdb_path)
+    if ext == '.cif':
+        pdb_path = convert_cif_to_pdb(pdb_path)
+    return pdb_path
+def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:
+    """
+    Create a PDB file with only the specified chain and replace B-factor with prediction scores
+    """
+    # Read the original PDB file
+    parser = PDBParser(QUIET=True)
+    structure = parser.get_structure('protein', input_pdb)
+    # Prepare a new structure with only the specified chain
+    new_structure = structure.copy()
+    for model in new_structure:
+        # Remove all chains except the specified one
+        chains_to_remove = [chain for chain in model if chain.id != chain_id]
+        for chain in chains_to_remove:
+            model.detach_child(chain.id)
+    # Create a modified PDB with scores in B-factor
+    scores_dict = {resi: score for resi, score in residue_scores}
+    for model in new_structure:
+        for chain in model:
+            for residue in chain:
+                if residue.id[1] in scores_dict:
+                    for atom in residue:
+                        atom.bfactor = scores_dict[residue.id[1]] #* 100  # Scale score to B-factor range
+    # Save the modified structure
+    output_pdb = f"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb"
+    io = PDBIO()
+    io.set_structure(new_structure)
+    io.save(output_pdb)
+    return output_pdb
+def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):
+    """
+    Calculate the geometric center of high-scoring residues
+    """
+    parser = PDBParser(QUIET=True)
     structure = parser.get_structure('protein', pdb_path)
+    # Collect coordinates of CA atoms from high-scoring residues
+    coords = []
+    for model in structure:
+        for chain in model:
+            if chain.id == chain_id:
+                for residue in chain:
+                    if residue.id[1] in high_score_residues:
+                        if 'CA' in residue:  # Use alpha carbon as representative
+                            ca_atom = residue['CA']
+                            coords.append(ca_atom.coord)
+    # Calculate geometric center
+    if coords:
+        center = np.mean(coords, axis=0)
+        return center
+    return None
+def process_pdb(pdb_id_or_file, segment):
+    # Determine if input is a PDB ID or file path
+    if pdb_id_or_file.endswith('.pdb'):
+        pdb_path = pdb_id_or_file
+        pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]
+    else:
+        pdb_id = pdb_id_or_file
+        pdb_path = fetch_pdb(pdb_id)
+    if not pdb_path:
+        return "Failed to fetch PDB file", None, None
+    # Determine the file format and choose the appropriate parser
+    _, ext = os.path.splitext(pdb_path)
+    parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)
+    try:
+        # Parse the structure file
+        structure = parser.get_structure('protein', pdb_path)
+    except Exception as e:
+        return f"Error parsing structure file: {e}", None, None
+    # Extract the specified chain
     try:
         chain = structure[0][segment]
     except KeyError:
         return "Invalid Chain ID", None, None
+    protein_residues = [res for res in chain if is_aa(res)]
+    sequence = "".join(seq1(res.resname) for res in protein_residues)
+    sequence_id = [res.id[1] for res in protein_residues]
     # Prepare input for model prediction
     input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
         outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
     # Calculate scores and normalize them
     scores = expit(outputs[:, 1] - outputs[:, 0])
     normalized_scores = normalize_scores(scores)
+    # Zip residues with scores to track the residue ID and score
+    residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]
+    # Identify high and mid scoring residues
+    high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
+    mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
+    # Calculate geometric center of high-scoring residues
+    geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)
+    pymol_selection = f"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}"
+    pymol_center_cmd = f"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}" if geo_center is not None else ""
+    # Generate the result string
+    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    result_str = f"Prediction for PDB: {pdb_id}, Chain: {segment}\nDate: {current_time}\n\n"
+    result_str += "Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\n\n"
+    result_str += "\n".join([
+        f"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
+        for i, res in enumerate(protein_residues)])
+    # Create prediction and scored PDB files
     prediction_file = f"{pdb_id}_predictions.txt"
     with open(prediction_file, "w") as f:
         f.write(result_str)
+    # Create chain-specific PDB with scores in B-factor
+    scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)
+    # Molecule visualization with updated script
+    mol_vis = molecule(pdb_path, residue_scores, segment)
+    # Construct PyMOL command suggestions
+    pymol_commands = f"""
+PyMOL Visualization Commands:
+1. Load PDB: load {os.path.abspath(pdb_path)}
+2. Select high-scoring residues: {pymol_selection}
+3. Highlight high-scoring residues: show sticks, high_score_residues
+{pymol_center_cmd}
+"""
+    return result_str + "\n\n" + pymol_commands, mol_vis, [prediction_file, scored_pdb]
 def molecule(input_pdb, residue_scores=None, segment='A'):
     mol = read_mol(input_pdb)  # Read PDB file content
     # Prepare high-scoring residues script if scores are provided
     high_score_script = ""
     if residue_scores is not None:
+        # Filter residues based on their scores
         high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
         mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
         high_score_script = """
+        // Load the original model and apply white cartoon style
+        let chainModel = viewer.addModel(pdb, "pdb");
+        chainModel.setStyle({}, {});
+        chainModel.setStyle(
             {"chain": "%s"},
+            {"cartoon": {"color": "white"}}
         );
+        // Create a new model for high-scoring residues and apply red sticks style
+        let highScoreModel = viewer.addModel(pdb, "pdb");
+        highScoreModel.setStyle({}, {});
+        highScoreModel.setStyle(
+            {"chain": "%s", "resi": [%s]},
             {"stick": {"color": "red"}}
         );
+        // Create a new model for medium-scoring residues and apply orange sticks style
+        let midScoreModel = viewer.addModel(pdb, "pdb");
+        midScoreModel.setStyle({}, {});
+        midScoreModel.setStyle(
+            {"chain": "%s", "resi": [%s]},
             {"stick": {"color": "orange"}}
         );
+        """ % (
+            segment,
+            segment,
+            ", ".join(str(resi) for resi in high_score_residues),
+            segment,
+            ", ".join(str(resi) for resi in mid_score_residues)
+        )
+    # Generate the full HTML content
     html_content = f"""
     <!DOCTYPE html>
     <html>
                 let element = $("#container");
                 let config = {{ backgroundColor: "white" }};
                 let viewer = $3Dmol.createViewer(element, config);
                 {high_score_script}
     # Return the HTML content within an iframe safely encoded for special characters
     return f'<iframe width="100%" height="700" srcdoc="{html_content.replace(chr(34), "&quot;").replace(chr(39), "&#39;")}"></iframe>'
 # Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# Protein Binding Site Prediction")
     with gr.Row():
+        pdb_input = gr.Textbox(value="4BDU", label="PDB ID", placeholder="Enter PDB ID here...")
         visualize_btn = gr.Button("Visualize Structure")
+    molecule_output2 = Molecule3D(label="Protein Structure", reps=[
+        {
+            "model": 0,
+            "style": "cartoon",
+            "color": "whiteCarbon",
+            "residue_range": "",
+            "around": 0,
+            "byres": False,
+        }
+    ])
     with gr.Row():
         segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
         prediction_btn = gr.Button("Predict Binding Site")
     molecule_output = gr.HTML(label="Protein Structure")
     predictions_output = gr.Textbox(label="Binding Site Predictions")
+    download_output = gr.File(label="Download Files", file_count="multiple")
+    prediction_btn.click(
+        process_pdb,
+        inputs=[
+            pdb_input,
+            segment_input
+        ],
+        outputs=[predictions_output, molecule_output, download_output]
+    )
+    visualize_btn.click(
+        fetch_pdb,
+        inputs=[pdb_input],
+        outputs=molecule_output2
+    )
     gr.Markdown("## Examples")
     gr.Examples(
         examples=[

.ipynb_checkpoints/test3-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,1599 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "2b84eb4e-3f91-4a28-8e4f-322a34a9fb55",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7877\n",
+      "* Running on public URL: https://a35567ec94eccaf8d1.gradio.live\n",
+      "\n",
+      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"https://a35567ec94eccaf8d1.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from Bio.PDB import PDBParser, MMCIFParser, MMCIF2Dict, PDBIO\n",
+    "from Bio.PDB.Polypeptide import is_aa\n",
+    "from Bio.SeqUtils import seq1\n",
+    "import gradio as gr\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import requests\n",
+    "from gradio_molecule3d import Molecule3D\n",
+    "from scipy.special import expit\n",
+    "from typing import Optional\n",
+    "\n",
+    "def normalize_scores(scores):\n",
+    "    min_score = np.min(scores)\n",
+    "    max_score = np.max(scores)\n",
+    "    return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
+    "\n",
+    "def read_mol(pdb_path):\n",
+    "    \"\"\"Read PDB file and return its content as a string\"\"\"\n",
+    "    with open(pdb_path, 'r') as f:\n",
+    "        return f.read()\n",
+    "\n",
+    "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
+    "    If a structure file already exists locally, it uses that.\n",
+    "    \"\"\"\n",
+    "    file_path = download_structure(pdb_id, output_dir)\n",
+    "    if file_path:\n",
+    "        return file_path\n",
+    "    else:\n",
+    "        return None\n",
+    "\n",
+    "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Attempt to download the structure file in CIF or PDB format.\n",
+    "    Returns the path to the downloaded file, or None if download fails.\n",
+    "    \"\"\"\n",
+    "    for ext in ['.cif', '.pdb']:\n",
+    "        file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
+    "        if os.path.exists(file_path):\n",
+    "            return file_path\n",
+    "        url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
+    "        try:\n",
+    "            response = requests.get(url, timeout=10)\n",
+    "            if response.status_code == 200:\n",
+    "                with open(file_path, 'wb') as f:\n",
+    "                    f.write(response.content)\n",
+    "                return file_path\n",
+    "        except Exception as e:\n",
+    "            print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
+    "    return None\n",
+    "\n",
+    "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
+    "    \"\"\"\n",
+    "    Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
+    "    \"\"\"\n",
+    "    pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
+    "    parser = MMCIFParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', cif_path)\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(structure)\n",
+    "    io.save(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def fetch_pdb(pdb_id):\n",
+    "    pdb_path = fetch_structure(pdb_id)\n",
+    "    if not pdb_path:\n",
+    "        return None\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    if ext == '.cif':\n",
+    "        pdb_path = convert_cif_to_pdb(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def process_pdb(pdb_id, segment):\n",
+    "    # Fetch the PDB or CIF file\n",
+    "    pdb_path = fetch_pdb(pdb_id)\n",
+    "    if not pdb_path:\n",
+    "        return \"Failed to fetch PDB file\", None, None\n",
+    "    \n",
+    "    # Determine the file format and choose the appropriate parser\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
+    "    \n",
+    "    try:\n",
+    "        # Parse the structure file\n",
+    "        structure = parser.get_structure('protein', pdb_path)\n",
+    "    except Exception as e:\n",
+    "        return f\"Error parsing structure file: {e}\", None, None\n",
+    "    \n",
+    "    # Extract the specified chain\n",
+    "    try:\n",
+    "        chain = structure[0][segment]\n",
+    "    except KeyError:\n",
+    "        return \"Invalid Chain ID\", None, None\n",
+    "    \n",
+    "    protein_residues = [res for res in chain if is_aa(res)]\n",
+    "    sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
+    "    sequence_id = [res.id[1] for res in protein_residues]\n",
+    "    \n",
+    "    # Generate random scores for residues\n",
+    "    scores = np.random.rand(len(sequence))\n",
+    "    normalized_scores = normalize_scores(scores)\n",
+    "    \n",
+    "    # Zip residues with scores to track the residue ID and score\n",
+    "    residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
+    "\n",
+    "    # Generate the result string\n",
+    "    result_str = \"\\n\".join([\n",
+    "        f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
+    "        for i, res in enumerate(protein_residues)])\n",
+    "    \n",
+    "    # Save the predictions to a file\n",
+    "    prediction_file = f\"{pdb_id}_predictions.txt\"\n",
+    "    with open(prediction_file, \"w\") as f:\n",
+    "        f.write(result_str)\n",
+    "\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    if ext == '.cif':\n",
+    "        pdb_path = convert_cif_to_pdb(pdb_path)\n",
+    "\n",
+    "    return result_str, molecule(pdb_path, residue_scores, segment), prediction_file\n",
+    "\n",
+    "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
+    "    mol = read_mol(input_pdb)  # Read PDB file content\n",
+    "    \n",
+    "    # Prepare high-scoring residues script if scores are provided\n",
+    "    high_score_script = \"\"\n",
+    "    if residue_scores is not None:\n",
+    "        # Sort residues based on their scores\n",
+    "        high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "        mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "        \n",
+    "        high_score_script = \"\"\"\n",
+    "        // Reset all styles first\n",
+    "        viewer.getModel(0).setStyle({}, {});\n",
+    "        \n",
+    "        // Show only the selected chain\n",
+    "        viewer.getModel(0).setStyle(\n",
+    "            {\"chain\": \"%s\"}, \n",
+    "            { cartoon: {colorscheme:\"whiteCarbon\"} }\n",
+    "        );\n",
+    "        \n",
+    "        // Highlight high-scoring residues only for the selected chain\n",
+    "        let highScoreResidues = [%s];\n",
+    "        viewer.getModel(0).setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
+    "            {\"stick\": {\"color\": \"red\"}}\n",
+    "        );\n",
+    "\n",
+    "        // Highlight medium-scoring residues only for the selected chain\n",
+    "        let midScoreResidues = [%s];\n",
+    "        viewer.getModel(0).setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
+    "            {\"stick\": {\"color\": \"orange\"}}\n",
+    "        );\n",
+    "        \"\"\" % (segment, \n",
+    "               \", \".join(str(resi) for resi in high_score_residues),\n",
+    "               segment,\n",
+    "               \", \".join(str(resi) for resi in mid_score_residues),\n",
+    "               segment)\n",
+    "    \n",
+    "    html_content = f\"\"\"\n",
+    "    <!DOCTYPE html>\n",
+    "    <html>\n",
+    "    <head>    \n",
+    "        <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
+    "        <style>\n",
+    "        .mol-container {{\n",
+    "            width: 100%;\n",
+    "            height: 700px;\n",
+    "            position: relative;\n",
+    "        }}\n",
+    "        </style>\n",
+    "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
+    "        <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
+    "    </head>\n",
+    "    <body>\n",
+    "        <div id=\"container\" class=\"mol-container\"></div>\n",
+    "        <script>\n",
+    "            let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
+    "            $(document).ready(function () {{\n",
+    "                let element = $(\"#container\");\n",
+    "                let config = {{ backgroundColor: \"white\" }};\n",
+    "                let viewer = $3Dmol.createViewer(element, config);\n",
+    "                viewer.addModel(pdb, \"pdb\");\n",
+    "                \n",
+    "                // Reset all styles and show only selected chain\n",
+    "                viewer.getModel(0).setStyle(\n",
+    "                    {{\"chain\": \"{segment}\"}}, \n",
+    "                    {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }}\n",
+    "                );\n",
+    "                \n",
+    "                {high_score_script}\n",
+    "                \n",
+    "                // Add hover functionality\n",
+    "                viewer.setHoverable(\n",
+    "                    {{}}, \n",
+    "                    true, \n",
+    "                    function(atom, viewer, event, container) {{\n",
+    "                        if (!atom.label) {{\n",
+    "                            atom.label = viewer.addLabel(\n",
+    "                                atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
+    "                                {{\n",
+    "                                    position: atom, \n",
+    "                                    backgroundColor: 'mintcream', \n",
+    "                                    fontColor: 'black',\n",
+    "                                    fontSize: 12,\n",
+    "                                    padding: 2\n",
+    "                                }}\n",
+    "                            );\n",
+    "                        }}\n",
+    "                    }},\n",
+    "                    function(atom, viewer) {{\n",
+    "                        if (atom.label) {{\n",
+    "                            viewer.removeLabel(atom.label);\n",
+    "                            delete atom.label;\n",
+    "                        }}\n",
+    "                    }}\n",
+    "                );\n",
+    "                \n",
+    "                viewer.zoomTo();\n",
+    "                viewer.render();\n",
+    "                viewer.zoom(0.8, 2000);\n",
+    "            }});\n",
+    "        </script>\n",
+    "    </body>\n",
+    "    </html>\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    # Return the HTML content within an iframe safely encoded for special characters\n",
+    "    return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
+    "\n",
+    "reps = [\n",
+    "    {\n",
+    "        \"model\": 0,\n",
+    "        \"style\": \"cartoon\",\n",
+    "        \"color\": \"whiteCarbon\",\n",
+    "        \"residue_range\": \"\",\n",
+    "        \"around\": 0,\n",
+    "        \"byres\": False,\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "# Gradio UI\n",
+    "with gr.Blocks() as demo:\n",
+    "    gr.Markdown(\"# Protein Binding Site Prediction\")\n",
+    "    with gr.Row():\n",
+    "        pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
+    "        visualize_btn = gr.Button(\"Visualize Structure\")\n",
+    "\n",
+    "    molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
+    "        prediction_btn = gr.Button(\"Predict Binding Site\")\n",
+    "\n",
+    "    molecule_output = gr.HTML(label=\"Protein Structure\")\n",
+    "    predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
+    "    download_output = gr.File(label=\"Download Predictions\")\n",
+    "    \n",
+    "    visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n",
+    "    \n",
+    "    prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
+    "    \n",
+    "    gr.Markdown(\"## Examples\")\n",
+    "    gr.Examples(\n",
+    "        examples=[\n",
+    "            [\"7RPZ\", \"A\"],\n",
+    "            [\"2IWI\", \"B\"],\n",
+    "            [\"2F6V\", \"A\"]\n",
+    "        ],\n",
+    "        inputs=[pdb_input, segment_input],\n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "demo.launch(share=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "a2f1ca04-7a27-4e4f-b44d-39b20c5d034a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7878\n",
+      "* Running on public URL: https://fbfb00e893a2d7c6ae.gradio.live\n",
+      "\n",
+      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"https://fbfb00e893a2d7c6ae.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from datetime import datetime\n",
+    "import gradio as gr\n",
+    "import numpy as np\n",
+    "import requests\n",
+    "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n",
+    "from Bio.PDB.Polypeptide import is_aa\n",
+    "from Bio.SeqUtils import seq1\n",
+    "from gradio_molecule3d import Molecule3D\n",
+    "from typing import Optional, Tuple\n",
+    "\n",
+    "def normalize_scores(scores):\n",
+    "    min_score = np.min(scores)\n",
+    "    max_score = np.max(scores)\n",
+    "    return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
+    "\n",
+    "def read_mol(pdb_path):\n",
+    "    \"\"\"Read PDB file and return its content as a string\"\"\"\n",
+    "    with open(pdb_path, 'r') as f:\n",
+    "        return f.read()\n",
+    "\n",
+    "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
+    "    If a structure file already exists locally, it uses that.\n",
+    "    \"\"\"\n",
+    "    file_path = download_structure(pdb_id, output_dir)\n",
+    "    if file_path:\n",
+    "        return file_path\n",
+    "    else:\n",
+    "        return None\n",
+    "\n",
+    "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Attempt to download the structure file in CIF or PDB format.\n",
+    "    Returns the path to the downloaded file, or None if download fails.\n",
+    "    \"\"\"\n",
+    "    for ext in ['.cif', '.pdb']:\n",
+    "        file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
+    "        if os.path.exists(file_path):\n",
+    "            return file_path\n",
+    "        url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
+    "        try:\n",
+    "            response = requests.get(url, timeout=10)\n",
+    "            if response.status_code == 200:\n",
+    "                with open(file_path, 'wb') as f:\n",
+    "                    f.write(response.content)\n",
+    "                return file_path\n",
+    "        except Exception as e:\n",
+    "            print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
+    "    return None\n",
+    "\n",
+    "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
+    "    \"\"\"\n",
+    "    Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
+    "    \"\"\"\n",
+    "    pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
+    "    parser = MMCIFParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', cif_path)\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(structure)\n",
+    "    io.save(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def fetch_pdb(pdb_id):\n",
+    "    pdb_path = fetch_structure(pdb_id)\n",
+    "    if not pdb_path:\n",
+    "        return None\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    if ext == '.cif':\n",
+    "        pdb_path = convert_cif_to_pdb(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n",
+    "    \"\"\"\n",
+    "    Create a PDB file with only the specified chain and replace B-factor with prediction scores\n",
+    "    \"\"\"\n",
+    "    # Read the original PDB file\n",
+    "    parser = PDBParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', input_pdb)\n",
+    "    \n",
+    "    # Prepare a new structure with only the specified chain\n",
+    "    new_structure = structure.copy()\n",
+    "    for model in new_structure:\n",
+    "        # Remove all chains except the specified one\n",
+    "        chains_to_remove = [chain for chain in model if chain.id != chain_id]\n",
+    "        for chain in chains_to_remove:\n",
+    "            model.detach_child(chain.id)\n",
+    "    \n",
+    "    # Create a modified PDB with scores in B-factor\n",
+    "    scores_dict = {resi: score for resi, score in residue_scores}\n",
+    "    for model in new_structure:\n",
+    "        for chain in model:\n",
+    "            for residue in chain:\n",
+    "                if residue.id[1] in scores_dict:\n",
+    "                    for atom in residue:\n",
+    "                        atom.bfactor = scores_dict[residue.id[1]] #* 100  # Scale score to B-factor range\n",
+    "    \n",
+    "    # Save the modified structure\n",
+    "    output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(new_structure)\n",
+    "    io.save(output_pdb)\n",
+    "    \n",
+    "    return output_pdb\n",
+    "\n",
+    "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n",
+    "    \"\"\"\n",
+    "    Calculate the geometric center of high-scoring residues\n",
+    "    \"\"\"\n",
+    "    parser = PDBParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', pdb_path)\n",
+    "    \n",
+    "    # Collect coordinates of CA atoms from high-scoring residues\n",
+    "    coords = []\n",
+    "    for model in structure:\n",
+    "        for chain in model:\n",
+    "            if chain.id == chain_id:\n",
+    "                for residue in chain:\n",
+    "                    if residue.id[1] in high_score_residues:\n",
+    "                        if 'CA' in residue:  # Use alpha carbon as representative\n",
+    "                            ca_atom = residue['CA']\n",
+    "                            coords.append(ca_atom.coord)\n",
+    "    \n",
+    "    # Calculate geometric center\n",
+    "    if coords:\n",
+    "        center = np.mean(coords, axis=0)\n",
+    "        return center\n",
+    "    return None\n",
+    "\n",
+    "def process_pdb(pdb_id_or_file, segment):\n",
+    "    # Determine if input is a PDB ID or file path\n",
+    "    if pdb_id_or_file.endswith('.pdb'):\n",
+    "        pdb_path = pdb_id_or_file\n",
+    "        pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n",
+    "    else:\n",
+    "        pdb_id = pdb_id_or_file\n",
+    "        pdb_path = fetch_pdb(pdb_id)\n",
+    "    \n",
+    "    if not pdb_path:\n",
+    "        return \"Failed to fetch PDB file\", None, None\n",
+    "    \n",
+    "    # Determine the file format and choose the appropriate parser\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
+    "    \n",
+    "    try:\n",
+    "        # Parse the structure file\n",
+    "        structure = parser.get_structure('protein', pdb_path)\n",
+    "    except Exception as e:\n",
+    "        return f\"Error parsing structure file: {e}\", None, None\n",
+    "    \n",
+    "    # Extract the specified chain\n",
+    "    try:\n",
+    "        chain = structure[0][segment]\n",
+    "    except KeyError:\n",
+    "        return \"Invalid Chain ID\", None, None\n",
+    "    \n",
+    "    protein_residues = [res for res in chain if is_aa(res)]\n",
+    "    sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
+    "    sequence_id = [res.id[1] for res in protein_residues]\n",
+    "    \n",
+    "    # Generate random scores for residues\n",
+    "    scores = np.random.rand(len(sequence))\n",
+    "    normalized_scores = normalize_scores(scores)\n",
+    "    \n",
+    "    # Zip residues with scores to track the residue ID and score\n",
+    "    residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
+    "\n",
+    "    # Identify high and mid scoring residues\n",
+    "    high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "    mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "\n",
+    "    # Calculate geometric center of high-scoring residues\n",
+    "    geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n",
+    "    pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n",
+    "    pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n",
+    "\n",
+    "    # Generate the result string\n",
+    "    current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
+    "    result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n",
+    "    result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n",
+    "    result_str += \"\\n\".join([\n",
+    "        f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
+    "        for i, res in enumerate(protein_residues)])\n",
+    "    \n",
+    "    # Create prediction and scored PDB files\n",
+    "    prediction_file = f\"{pdb_id}_predictions.txt\"\n",
+    "    with open(prediction_file, \"w\") as f:\n",
+    "        f.write(result_str)\n",
+    "\n",
+    "    # Create chain-specific PDB with scores in B-factor\n",
+    "    scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n",
+    "\n",
+    "    # Molecule visualization with updated script\n",
+    "    mol_vis = molecule(pdb_path, residue_scores, segment)\n",
+    "\n",
+    "    # Construct PyMOL command suggestions\n",
+    "    pymol_commands = f\"\"\"\n",
+    "PyMOL Visualization Commands:\n",
+    "1. Load PDB: load {os.path.abspath(pdb_path)}\n",
+    "2. Select high-scoring residues: {pymol_selection}\n",
+    "3. Highlight high-scoring residues: show sticks, high_score_residues\n",
+    "{pymol_center_cmd}\n",
+    "\"\"\"\n",
+    "    \n",
+    "    return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n",
+    "\n",
+    "# molecule() function remains the same as in the previous script, \n",
+    "# but modify the visualization script to ensure cartoon is below stick representations\n",
+    "\n",
+    "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
+    "    mol = read_mol(input_pdb)  # Read PDB file content\n",
+    "    \n",
+    "    # Prepare high-scoring residues script if scores are provided\n",
+    "    high_score_script = \"\"\n",
+    "    if residue_scores is not None:\n",
+    "        # Sort residues based on their scores\n",
+    "        high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "        mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "        \n",
+    "        high_score_script = \"\"\"\n",
+    "        // Reset all styles first\n",
+    "        viewer.getModel(0).setStyle({}, {});\n",
+    "        \n",
+    "        // First, set background cartoon style for the entire chain (underneath)\n",
+    "        viewer.getModel(0).setStyle(\n",
+    "            {\"chain\": \"%s\"}, \n",
+    "            { cartoon: {colorscheme:\"whiteCarbon\", opacity:0.7} }\n",
+    "        );\n",
+    "        \n",
+    "        // Highlight high-scoring residues with sticks on top\n",
+    "        let highScoreResidues = [%s];\n",
+    "        viewer.getModel(0).setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
+    "            {\"stick\": {\"color\": \"red\", \"opacity\": 1}}\n",
+    "        );\n",
+    "\n",
+    "        // Highlight medium-scoring residues\n",
+    "        let midScoreResidues = [%s];\n",
+    "        viewer.getModel(0).setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
+    "            {\"stick\": {\"color\": \"orange\", \"opacity\": 0.8}}\n",
+    "        );\n",
+    "        \"\"\" % (segment, \n",
+    "               \", \".join(str(resi) for resi in high_score_residues),\n",
+    "               segment,\n",
+    "               \", \".join(str(resi) for resi in mid_score_residues),\n",
+    "               segment)\n",
+    "    \n",
+    "    # Rest of the molecule() function remains the same as in the previous script\n",
+    "    \n",
+    "    html_content = f\"\"\"\n",
+    "    <!DOCTYPE html>\n",
+    "    <html>\n",
+    "    <head>    \n",
+    "        <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
+    "        <style>\n",
+    "        .mol-container {{\n",
+    "            width: 100%;\n",
+    "            height: 700px;\n",
+    "            position: relative;\n",
+    "        }}\n",
+    "        </style>\n",
+    "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
+    "        <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
+    "    </head>\n",
+    "    <body>\n",
+    "        <div id=\"container\" class=\"mol-container\"></div>\n",
+    "        <script>\n",
+    "            let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
+    "            $(document).ready(function () {{\n",
+    "                let element = $(\"#container\");\n",
+    "                let config = {{ backgroundColor: \"white\" }};\n",
+    "                let viewer = $3Dmol.createViewer(element, config);\n",
+    "                viewer.addModel(pdb, \"pdb\");\n",
+    "                \n",
+    "                {high_score_script}\n",
+    "                \n",
+    "                // Add hover functionality (unchanged from before)\n",
+    "                viewer.setHoverable(\n",
+    "                    {{}}, \n",
+    "                    true, \n",
+    "                    function(atom, viewer, event, container) {{\n",
+    "                        if (!atom.label) {{\n",
+    "                            atom.label = viewer.addLabel(\n",
+    "                                atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
+    "                                {{\n",
+    "                                    position: atom, \n",
+    "                                    backgroundColor: 'mintcream', \n",
+    "                                    fontColor: 'black',\n",
+    "                                    fontSize: 12,\n",
+    "                                    padding: 2\n",
+    "                                }}\n",
+    "                            );\n",
+    "                        }}\n",
+    "                    }},\n",
+    "                    function(atom, viewer) {{\n",
+    "                        if (atom.label) {{\n",
+    "                            viewer.removeLabel(atom.label);\n",
+    "                            delete atom.label;\n",
+    "                        }}\n",
+    "                    }}\n",
+    "                );\n",
+    "                \n",
+    "                viewer.zoomTo();\n",
+    "                viewer.render();\n",
+    "                viewer.zoom(0.8, 2000);\n",
+    "            }});\n",
+    "        </script>\n",
+    "    </body>\n",
+    "    </html>\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    # Return the HTML content within an iframe safely encoded for special characters\n",
+    "    return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
+    "\n",
+    "# Gradio UI\n",
+    "with gr.Blocks() as demo:\n",
+    "    gr.Markdown(\"# Protein Binding Site Prediction\")\n",
+    "    \n",
+    "    with gr.Row():\n",
+    "        pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
+    "        file_input = gr.File(label=\"Or Upload PDB File\", file_types=['.pdb'], type=\"filepath\")\n",
+    "        visualize_btn = gr.Button(\"Visualize Structure\")\n",
+    "\n",
+    "    molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n",
+    "        {\n",
+    "            \"model\": 0,\n",
+    "            \"style\": \"cartoon\",\n",
+    "            \"color\": \"whiteCarbon\",\n",
+    "            \"residue_range\": \"\",\n",
+    "            \"around\": 0,\n",
+    "            \"byres\": False,\n",
+    "        }\n",
+    "    ])\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
+    "        prediction_btn = gr.Button(\"Predict Binding Site\")\n",
+    "\n",
+    "    def process_input(pdb_id, uploaded_file):\n",
+    "        \"\"\"\n",
+    "        Determine whether to use PDB ID or uploaded file\n",
+    "        \"\"\"\n",
+    "        if uploaded_file and uploaded_file.endswith('.pdb'):\n",
+    "            return uploaded_file\n",
+    "        return pdb_id\n",
+    "\n",
+    "    molecule_output = gr.HTML(label=\"Protein Structure\")\n",
+    "    predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
+    "    download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n",
+    "    \n",
+    "    prediction_btn.click(\n",
+    "        process_pdb, \n",
+    "        inputs=[\n",
+    "            gr.State(lambda: process_input(pdb_input.value, file_input.value)), \n",
+    "            segment_input\n",
+    "        ], \n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "    visualize_btn.click(\n",
+    "        fetch_pdb, \n",
+    "        inputs=[pdb_input], \n",
+    "        outputs=molecule_output2\n",
+    "    )\n",
+    "\n",
+    "    gr.Markdown(\"## Examples\")\n",
+    "    gr.Examples(\n",
+    "        examples=[\n",
+    "            [\"7RPZ\", \"A\"],\n",
+    "            [\"2IWI\", \"B\"],\n",
+    "            [\"2F6V\", \"A\"]\n",
+    "        ],\n",
+    "        inputs=[pdb_input, segment_input],\n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "demo.launch(share=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "5b266025-7503-48f5-9371-3642d09f7e93",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7890\n",
+      "* Running on public URL: https://70a6e80d8deb42ddd0.gradio.live\n",
+      "\n",
+      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"https://70a6e80d8deb42ddd0.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from datetime import datetime\n",
+    "import gradio as gr\n",
+    "import numpy as np\n",
+    "import requests\n",
+    "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n",
+    "from Bio.PDB.Polypeptide import is_aa\n",
+    "from Bio.SeqUtils import seq1\n",
+    "from gradio_molecule3d import Molecule3D\n",
+    "from typing import Optional, Tuple\n",
+    "\n",
+    "def normalize_scores(scores):\n",
+    "    min_score = np.min(scores)\n",
+    "    max_score = np.max(scores)\n",
+    "    return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
+    "\n",
+    "def read_mol(pdb_path):\n",
+    "    \"\"\"Read PDB file and return its content as a string\"\"\"\n",
+    "    with open(pdb_path, 'r') as f:\n",
+    "        return f.read()\n",
+    "\n",
+    "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
+    "    If a structure file already exists locally, it uses that.\n",
+    "    \"\"\"\n",
+    "    file_path = download_structure(pdb_id, output_dir)\n",
+    "    if file_path:\n",
+    "        return file_path\n",
+    "    else:\n",
+    "        return None\n",
+    "\n",
+    "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Attempt to download the structure file in CIF or PDB format.\n",
+    "    Returns the path to the downloaded file, or None if download fails.\n",
+    "    \"\"\"\n",
+    "    for ext in ['.cif', '.pdb']:\n",
+    "        file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
+    "        if os.path.exists(file_path):\n",
+    "            return file_path\n",
+    "        url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
+    "        try:\n",
+    "            response = requests.get(url, timeout=10)\n",
+    "            if response.status_code == 200:\n",
+    "                with open(file_path, 'wb') as f:\n",
+    "                    f.write(response.content)\n",
+    "                return file_path\n",
+    "        except Exception as e:\n",
+    "            print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
+    "    return None\n",
+    "\n",
+    "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
+    "    \"\"\"\n",
+    "    Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
+    "    \"\"\"\n",
+    "    pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
+    "    parser = MMCIFParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', cif_path)\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(structure)\n",
+    "    io.save(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def fetch_pdb(pdb_id):\n",
+    "    pdb_path = fetch_structure(pdb_id)\n",
+    "    if not pdb_path:\n",
+    "        return None\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    if ext == '.cif':\n",
+    "        pdb_path = convert_cif_to_pdb(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n",
+    "    \"\"\"\n",
+    "    Create a PDB file with only the specified chain and replace B-factor with prediction scores\n",
+    "    \"\"\"\n",
+    "    # Read the original PDB file\n",
+    "    parser = PDBParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', input_pdb)\n",
+    "    \n",
+    "    # Prepare a new structure with only the specified chain\n",
+    "    new_structure = structure.copy()\n",
+    "    for model in new_structure:\n",
+    "        # Remove all chains except the specified one\n",
+    "        chains_to_remove = [chain for chain in model if chain.id != chain_id]\n",
+    "        for chain in chains_to_remove:\n",
+    "            model.detach_child(chain.id)\n",
+    "    \n",
+    "    # Create a modified PDB with scores in B-factor\n",
+    "    scores_dict = {resi: score for resi, score in residue_scores}\n",
+    "    for model in new_structure:\n",
+    "        for chain in model:\n",
+    "            for residue in chain:\n",
+    "                if residue.id[1] in scores_dict:\n",
+    "                    for atom in residue:\n",
+    "                        atom.bfactor = scores_dict[residue.id[1]] #* 100  # Scale score to B-factor range\n",
+    "    \n",
+    "    # Save the modified structure\n",
+    "    output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(new_structure)\n",
+    "    io.save(output_pdb)\n",
+    "    \n",
+    "    return output_pdb\n",
+    "\n",
+    "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n",
+    "    \"\"\"\n",
+    "    Calculate the geometric center of high-scoring residues\n",
+    "    \"\"\"\n",
+    "    parser = PDBParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', pdb_path)\n",
+    "    \n",
+    "    # Collect coordinates of CA atoms from high-scoring residues\n",
+    "    coords = []\n",
+    "    for model in structure:\n",
+    "        for chain in model:\n",
+    "            if chain.id == chain_id:\n",
+    "                for residue in chain:\n",
+    "                    if residue.id[1] in high_score_residues:\n",
+    "                        if 'CA' in residue:  # Use alpha carbon as representative\n",
+    "                            ca_atom = residue['CA']\n",
+    "                            coords.append(ca_atom.coord)\n",
+    "    \n",
+    "    # Calculate geometric center\n",
+    "    if coords:\n",
+    "        center = np.mean(coords, axis=0)\n",
+    "        return center\n",
+    "    return None\n",
+    "\n",
+    "def process_pdb(pdb_id_or_file, segment):\n",
+    "    # Determine if input is a PDB ID or file path\n",
+    "    if pdb_id_or_file.endswith('.pdb'):\n",
+    "        pdb_path = pdb_id_or_file\n",
+    "        pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n",
+    "    else:\n",
+    "        pdb_id = pdb_id_or_file\n",
+    "        pdb_path = fetch_pdb(pdb_id)\n",
+    "    \n",
+    "    if not pdb_path:\n",
+    "        return \"Failed to fetch PDB file\", None, None\n",
+    "    \n",
+    "    # Determine the file format and choose the appropriate parser\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
+    "    \n",
+    "    try:\n",
+    "        # Parse the structure file\n",
+    "        structure = parser.get_structure('protein', pdb_path)\n",
+    "    except Exception as e:\n",
+    "        return f\"Error parsing structure file: {e}\", None, None\n",
+    "    \n",
+    "    # Extract the specified chain\n",
+    "    try:\n",
+    "        chain = structure[0][segment]\n",
+    "    except KeyError:\n",
+    "        return \"Invalid Chain ID\", None, None\n",
+    "    \n",
+    "    protein_residues = [res for res in chain if is_aa(res)]\n",
+    "    sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
+    "    sequence_id = [res.id[1] for res in protein_residues]\n",
+    "    \n",
+    "    # Generate random scores for residues\n",
+    "    scores = np.random.rand(len(sequence))\n",
+    "    normalized_scores = normalize_scores(scores)\n",
+    "    \n",
+    "    # Zip residues with scores to track the residue ID and score\n",
+    "    residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
+    "\n",
+    "    # Identify high and mid scoring residues\n",
+    "    high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "    mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "\n",
+    "    # Calculate geometric center of high-scoring residues\n",
+    "    geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n",
+    "    pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n",
+    "    pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n",
+    "\n",
+    "    # Generate the result string\n",
+    "    current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
+    "    result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n",
+    "    result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n",
+    "    result_str += \"\\n\".join([\n",
+    "        f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
+    "        for i, res in enumerate(protein_residues)])\n",
+    "    \n",
+    "    # Create prediction and scored PDB files\n",
+    "    prediction_file = f\"{pdb_id}_predictions.txt\"\n",
+    "    with open(prediction_file, \"w\") as f:\n",
+    "        f.write(result_str)\n",
+    "\n",
+    "    # Create chain-specific PDB with scores in B-factor\n",
+    "    scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n",
+    "\n",
+    "    # Molecule visualization with updated script\n",
+    "    mol_vis = molecule(pdb_path, residue_scores, segment)\n",
+    "\n",
+    "    # Construct PyMOL command suggestions\n",
+    "    pymol_commands = f\"\"\"\n",
+    "PyMOL Visualization Commands:\n",
+    "1. Load PDB: load {os.path.abspath(pdb_path)}\n",
+    "2. Select high-scoring residues: {pymol_selection}\n",
+    "3. Highlight high-scoring residues: show sticks, high_score_residues\n",
+    "{pymol_center_cmd}\n",
+    "\"\"\"\n",
+    "    \n",
+    "    return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n",
+    "\n",
+    "# molecule() function remains the same as in the previous script, \n",
+    "# but modify the visualization script to ensure cartoon is below stick representations\n",
+    "\n",
+    "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
+    "    mol = read_mol(input_pdb)  # Read PDB file content\n",
+    "\n",
+    "    # Prepare high-scoring residues script if scores are provided\n",
+    "    high_score_script = \"\"\n",
+    "    if residue_scores is not None:\n",
+    "        # Filter residues based on their scores\n",
+    "        high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "        mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "        \n",
+    "        high_score_script = \"\"\"\n",
+    "        // Load the original model and apply white cartoon style\n",
+    "        let chainModel = viewer.addModel(pdb, \"pdb\");\n",
+    "        chainModel.setStyle(\n",
+    "            {\"chain\": \"%s\"}, \n",
+    "            {\"cartoon\": {\"color\": \"white\"}}\n",
+    "        );\n",
+    "\n",
+    "        // Create a new model for high-scoring residues and apply red sticks style\n",
+    "        let highScoreModel = viewer.addModel(pdb, \"pdb\");\n",
+    "        highScoreModel.setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": [%s]}, \n",
+    "            {\"stick\": {\"color\": \"red\"}}\n",
+    "        );\n",
+    "\n",
+    "        // Create a new model for medium-scoring residues and apply orange sticks style\n",
+    "        let midScoreModel = viewer.addModel(pdb, \"pdb\");\n",
+    "        midScoreModel.setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": [%s]}, \n",
+    "            {\"stick\": {\"color\": \"orange\"}}\n",
+    "        );\n",
+    "        \"\"\" % (\n",
+    "            segment,\n",
+    "            segment,\n",
+    "            \", \".join(str(resi) for resi in high_score_residues),\n",
+    "            segment,\n",
+    "            \", \".join(str(resi) for resi in mid_score_residues)\n",
+    "        )\n",
+    "    \n",
+    "    # Generate the full HTML content\n",
+    "    html_content = f\"\"\"\n",
+    "    <!DOCTYPE html>\n",
+    "    <html>\n",
+    "    <head>    \n",
+    "        <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
+    "        <style>\n",
+    "        .mol-container {{\n",
+    "            width: 100%;\n",
+    "            height: 700px;\n",
+    "            position: relative;\n",
+    "        }}\n",
+    "        </style>\n",
+    "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
+    "        <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
+    "    </head>\n",
+    "    <body>\n",
+    "        <div id=\"container\" class=\"mol-container\"></div>\n",
+    "        <script>\n",
+    "            let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
+    "            $(document).ready(function () {{\n",
+    "                let element = $(\"#container\");\n",
+    "                let config = {{ backgroundColor: \"white\" }};\n",
+    "                let viewer = $3Dmol.createViewer(element, config);\n",
+    "                \n",
+    "                {high_score_script}\n",
+    "                \n",
+    "                // Add hover functionality\n",
+    "                viewer.setHoverable(\n",
+    "                    {{}}, \n",
+    "                    true, \n",
+    "                    function(atom, viewer, event, container) {{\n",
+    "                        if (!atom.label) {{\n",
+    "                            atom.label = viewer.addLabel(\n",
+    "                                atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
+    "                                {{\n",
+    "                                    position: atom, \n",
+    "                                    backgroundColor: 'mintcream', \n",
+    "                                    fontColor: 'black',\n",
+    "                                    fontSize: 12,\n",
+    "                                    padding: 2\n",
+    "                                }}\n",
+    "                            );\n",
+    "                        }}\n",
+    "                    }},\n",
+    "                    function(atom, viewer) {{\n",
+    "                        if (atom.label) {{\n",
+    "                            viewer.removeLabel(atom.label);\n",
+    "                            delete atom.label;\n",
+    "                        }}\n",
+    "                    }}\n",
+    "                );\n",
+    "                \n",
+    "                viewer.zoomTo();\n",
+    "                viewer.render();\n",
+    "                viewer.zoom(0.8, 2000);\n",
+    "            }});\n",
+    "        </script>\n",
+    "    </body>\n",
+    "    </html>\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    # Return the HTML content within an iframe safely encoded for special characters\n",
+    "    return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
+    "\n",
+    "\n",
+    "# Gradio UI\n",
+    "with gr.Blocks() as demo:\n",
+    "    gr.Markdown(\"# Protein Binding Site Prediction\")\n",
+    "    \n",
+    "    with gr.Row():\n",
+    "        pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
+    "        visualize_btn = gr.Button(\"Visualize Structure\")\n",
+    "\n",
+    "    molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n",
+    "        {\n",
+    "            \"model\": 0,\n",
+    "            \"style\": \"cartoon\",\n",
+    "            \"color\": \"whiteCarbon\",\n",
+    "            \"residue_range\": \"\",\n",
+    "            \"around\": 0,\n",
+    "            \"byres\": False,\n",
+    "        }\n",
+    "    ])\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
+    "        prediction_btn = gr.Button(\"Predict Binding Site\")\n",
+    "\n",
+    "\n",
+    "    molecule_output = gr.HTML(label=\"Protein Structure\")\n",
+    "    predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
+    "    download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n",
+    "    \n",
+    "    prediction_btn.click(\n",
+    "        process_pdb, \n",
+    "        inputs=[\n",
+    "            pdb_input, \n",
+    "            segment_input\n",
+    "        ], \n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "    visualize_btn.click(\n",
+    "        fetch_pdb, \n",
+    "        inputs=[pdb_input], \n",
+    "        outputs=molecule_output2\n",
+    "    )\n",
+    "\n",
+    "    gr.Markdown(\"## Examples\")\n",
+    "    gr.Examples(\n",
+    "        examples=[\n",
+    "            [\"7RPZ\", \"A\"],\n",
+    "            [\"2IWI\", \"B\"],\n",
+    "            [\"2F6V\", \"A\"]\n",
+    "        ],\n",
+    "        inputs=[pdb_input, segment_input],\n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "demo.launch(share=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "514fad12-a31a-495f-af9e-04a18e11175e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7896\n",
+      "* Running on public URL: https://387fb4706015321f92.gradio.live\n",
+      "\n",
+      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"https://387fb4706015321f92.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from datetime import datetime\n",
+    "import gradio as gr\n",
+    "import numpy as np\n",
+    "import requests\n",
+    "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n",
+    "from Bio.PDB.Polypeptide import is_aa\n",
+    "from Bio.SeqUtils import seq1\n",
+    "from gradio_molecule3d import Molecule3D\n",
+    "from typing import Optional, Tuple\n",
+    "\n",
+    "def normalize_scores(scores):\n",
+    "    min_score = np.min(scores)\n",
+    "    max_score = np.max(scores)\n",
+    "    return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
+    "\n",
+    "def read_mol(pdb_path):\n",
+    "    \"\"\"Read PDB file and return its content as a string\"\"\"\n",
+    "    with open(pdb_path, 'r') as f:\n",
+    "        return f.read()\n",
+    "\n",
+    "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
+    "    If a structure file already exists locally, it uses that.\n",
+    "    \"\"\"\n",
+    "    file_path = download_structure(pdb_id, output_dir)\n",
+    "    if file_path:\n",
+    "        return file_path\n",
+    "    else:\n",
+    "        return None\n",
+    "\n",
+    "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Attempt to download the structure file in CIF or PDB format.\n",
+    "    Returns the path to the downloaded file, or None if download fails.\n",
+    "    \"\"\"\n",
+    "    for ext in ['.cif', '.pdb']:\n",
+    "        file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
+    "        if os.path.exists(file_path):\n",
+    "            return file_path\n",
+    "        url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
+    "        try:\n",
+    "            response = requests.get(url, timeout=10)\n",
+    "            if response.status_code == 200:\n",
+    "                with open(file_path, 'wb') as f:\n",
+    "                    f.write(response.content)\n",
+    "                return file_path\n",
+    "        except Exception as e:\n",
+    "            print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
+    "    return None\n",
+    "\n",
+    "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
+    "    \"\"\"\n",
+    "    Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
+    "    \"\"\"\n",
+    "    pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
+    "    parser = MMCIFParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', cif_path)\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(structure)\n",
+    "    io.save(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def fetch_pdb(pdb_id):\n",
+    "    pdb_path = fetch_structure(pdb_id)\n",
+    "    if not pdb_path:\n",
+    "        return None\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    if ext == '.cif':\n",
+    "        pdb_path = convert_cif_to_pdb(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n",
+    "    \"\"\"\n",
+    "    Create a PDB file with only the specified chain and replace B-factor with prediction scores\n",
+    "    \"\"\"\n",
+    "    # Read the original PDB file\n",
+    "    parser = PDBParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', input_pdb)\n",
+    "    \n",
+    "    # Prepare a new structure with only the specified chain\n",
+    "    new_structure = structure.copy()\n",
+    "    for model in new_structure:\n",
+    "        # Remove all chains except the specified one\n",
+    "        chains_to_remove = [chain for chain in model if chain.id != chain_id]\n",
+    "        for chain in chains_to_remove:\n",
+    "            model.detach_child(chain.id)\n",
+    "    \n",
+    "    # Create a modified PDB with scores in B-factor\n",
+    "    scores_dict = {resi: score for resi, score in residue_scores}\n",
+    "    for model in new_structure:\n",
+    "        for chain in model:\n",
+    "            for residue in chain:\n",
+    "                if residue.id[1] in scores_dict:\n",
+    "                    for atom in residue:\n",
+    "                        atom.bfactor = scores_dict[residue.id[1]] #* 100  # Scale score to B-factor range\n",
+    "    \n",
+    "    # Save the modified structure\n",
+    "    output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(new_structure)\n",
+    "    io.save(output_pdb)\n",
+    "    \n",
+    "    return output_pdb\n",
+    "\n",
+    "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n",
+    "    \"\"\"\n",
+    "    Calculate the geometric center of high-scoring residues\n",
+    "    \"\"\"\n",
+    "    parser = PDBParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', pdb_path)\n",
+    "    \n",
+    "    # Collect coordinates of CA atoms from high-scoring residues\n",
+    "    coords = []\n",
+    "    for model in structure:\n",
+    "        for chain in model:\n",
+    "            if chain.id == chain_id:\n",
+    "                for residue in chain:\n",
+    "                    if residue.id[1] in high_score_residues:\n",
+    "                        if 'CA' in residue:  # Use alpha carbon as representative\n",
+    "                            ca_atom = residue['CA']\n",
+    "                            coords.append(ca_atom.coord)\n",
+    "    \n",
+    "    # Calculate geometric center\n",
+    "    if coords:\n",
+    "        center = np.mean(coords, axis=0)\n",
+    "        return center\n",
+    "    return None\n",
+    "\n",
+    "def process_pdb(pdb_id_or_file, segment):\n",
+    "    # Determine if input is a PDB ID or file path\n",
+    "    if pdb_id_or_file.endswith('.pdb'):\n",
+    "        pdb_path = pdb_id_or_file\n",
+    "        pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n",
+    "    else:\n",
+    "        pdb_id = pdb_id_or_file\n",
+    "        pdb_path = fetch_pdb(pdb_id)\n",
+    "    \n",
+    "    if not pdb_path:\n",
+    "        return \"Failed to fetch PDB file\", None, None\n",
+    "    \n",
+    "    # Determine the file format and choose the appropriate parser\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
+    "    \n",
+    "    try:\n",
+    "        # Parse the structure file\n",
+    "        structure = parser.get_structure('protein', pdb_path)\n",
+    "    except Exception as e:\n",
+    "        return f\"Error parsing structure file: {e}\", None, None\n",
+    "    \n",
+    "    # Extract the specified chain\n",
+    "    try:\n",
+    "        chain = structure[0][segment]\n",
+    "    except KeyError:\n",
+    "        return \"Invalid Chain ID\", None, None\n",
+    "    \n",
+    "    protein_residues = [res for res in chain if is_aa(res)]\n",
+    "    sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
+    "    sequence_id = [res.id[1] for res in protein_residues]\n",
+    "    \n",
+    "    # Generate random scores for residues\n",
+    "    scores = np.random.rand(len(sequence))\n",
+    "    normalized_scores = normalize_scores(scores)\n",
+    "    \n",
+    "    # Zip residues with scores to track the residue ID and score\n",
+    "    residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
+    "\n",
+    "    # Identify high and mid scoring residues\n",
+    "    high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "    mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "\n",
+    "    # Calculate geometric center of high-scoring residues\n",
+    "    geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n",
+    "    pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n",
+    "    pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n",
+    "\n",
+    "    # Generate the result string\n",
+    "    current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
+    "    result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n",
+    "    result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n",
+    "    result_str += \"\\n\".join([\n",
+    "        f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
+    "        for i, res in enumerate(protein_residues)])\n",
+    "    \n",
+    "    # Create prediction and scored PDB files\n",
+    "    prediction_file = f\"{pdb_id}_predictions.txt\"\n",
+    "    with open(prediction_file, \"w\") as f:\n",
+    "        f.write(result_str)\n",
+    "\n",
+    "    # Create chain-specific PDB with scores in B-factor\n",
+    "    scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n",
+    "\n",
+    "    # Molecule visualization with updated script\n",
+    "    mol_vis = molecule(pdb_path, residue_scores, segment)\n",
+    "\n",
+    "    # Construct PyMOL command suggestions\n",
+    "    pymol_commands = f\"\"\"\n",
+    "PyMOL Visualization Commands:\n",
+    "1. Load PDB: load {os.path.abspath(pdb_path)}\n",
+    "2. Select high-scoring residues: {pymol_selection}\n",
+    "3. Highlight high-scoring residues: show sticks, high_score_residues\n",
+    "{pymol_center_cmd}\n",
+    "\"\"\"\n",
+    "    \n",
+    "    return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n",
+    "\n",
+    "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
+    "    mol = read_mol(input_pdb)  # Read PDB file content\n",
+    "\n",
+    "    # Prepare high-scoring residues script if scores are provided\n",
+    "    high_score_script = \"\"\n",
+    "    if residue_scores is not None:\n",
+    "        # Filter residues based on their scores\n",
+    "        high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "        mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "        \n",
+    "        high_score_script = \"\"\"\n",
+    "        // Load the original model and apply white cartoon style\n",
+    "        let chainModel = viewer.addModel(pdb, \"pdb\");\n",
+    "        chainModel.setStyle({}, {});\n",
+    "        chainModel.setStyle(\n",
+    "            {\"chain\": \"%s\"}, \n",
+    "            {\"cartoon\": {\"color\": \"white\"}}\n",
+    "        );\n",
+    "\n",
+    "        // Create a new model for high-scoring residues and apply red sticks style\n",
+    "        let highScoreModel = viewer.addModel(pdb, \"pdb\");\n",
+    "        highScoreModel.setStyle({}, {});\n",
+    "        highScoreModel.setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": [%s]}, \n",
+    "            {\"stick\": {\"color\": \"red\"}}\n",
+    "        );\n",
+    "\n",
+    "        // Create a new model for medium-scoring residues and apply orange sticks style\n",
+    "        let midScoreModel = viewer.addModel(pdb, \"pdb\");\n",
+    "        midScoreModel.setStyle({}, {});\n",
+    "        midScoreModel.setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": [%s]}, \n",
+    "            {\"stick\": {\"color\": \"orange\"}}\n",
+    "        );\n",
+    "        \"\"\" % (\n",
+    "            segment,\n",
+    "            segment,\n",
+    "            \", \".join(str(resi) for resi in high_score_residues),\n",
+    "            segment,\n",
+    "            \", \".join(str(resi) for resi in mid_score_residues)\n",
+    "        )\n",
+    "    \n",
+    "    # Generate the full HTML content\n",
+    "    html_content = f\"\"\"\n",
+    "    <!DOCTYPE html>\n",
+    "    <html>\n",
+    "    <head>    \n",
+    "        <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
+    "        <style>\n",
+    "        .mol-container {{\n",
+    "            width: 100%;\n",
+    "            height: 700px;\n",
+    "            position: relative;\n",
+    "        }}\n",
+    "        </style>\n",
+    "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
+    "        <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
+    "    </head>\n",
+    "    <body>\n",
+    "        <div id=\"container\" class=\"mol-container\"></div>\n",
+    "        <script>\n",
+    "            let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
+    "            $(document).ready(function () {{\n",
+    "                let element = $(\"#container\");\n",
+    "                let config = {{ backgroundColor: \"white\" }};\n",
+    "                let viewer = $3Dmol.createViewer(element, config);\n",
+    "                \n",
+    "                {high_score_script}\n",
+    "                \n",
+    "                // Add hover functionality\n",
+    "                viewer.setHoverable(\n",
+    "                    {{}}, \n",
+    "                    true, \n",
+    "                    function(atom, viewer, event, container) {{\n",
+    "                        if (!atom.label) {{\n",
+    "                            atom.label = viewer.addLabel(\n",
+    "                                atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
+    "                                {{\n",
+    "                                    position: atom, \n",
+    "                                    backgroundColor: 'mintcream', \n",
+    "                                    fontColor: 'black',\n",
+    "                                    fontSize: 12,\n",
+    "                                    padding: 2\n",
+    "                                }}\n",
+    "                            );\n",
+    "                        }}\n",
+    "                    }},\n",
+    "                    function(atom, viewer) {{\n",
+    "                        if (atom.label) {{\n",
+    "                            viewer.removeLabel(atom.label);\n",
+    "                            delete atom.label;\n",
+    "                        }}\n",
+    "                    }}\n",
+    "                );\n",
+    "                \n",
+    "                viewer.zoomTo();\n",
+    "                viewer.render();\n",
+    "                viewer.zoom(0.8, 2000);\n",
+    "            }});\n",
+    "        </script>\n",
+    "    </body>\n",
+    "    </html>\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    # Return the HTML content within an iframe safely encoded for special characters\n",
+    "    return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
+    "\n",
+    "\n",
+    "# Gradio UI\n",
+    "with gr.Blocks() as demo:\n",
+    "    gr.Markdown(\"# Protein Binding Site Prediction\")\n",
+    "    \n",
+    "    with gr.Row():\n",
+    "        pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
+    "        visualize_btn = gr.Button(\"Visualize Structure\")\n",
+    "\n",
+    "    molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n",
+    "        {\n",
+    "            \"model\": 0,\n",
+    "            \"style\": \"cartoon\",\n",
+    "            \"color\": \"whiteCarbon\",\n",
+    "            \"residue_range\": \"\",\n",
+    "            \"around\": 0,\n",
+    "            \"byres\": False,\n",
+    "        }\n",
+    "    ])\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
+    "        prediction_btn = gr.Button(\"Predict Binding Site\")\n",
+    "\n",
+    "\n",
+    "    molecule_output = gr.HTML(label=\"Protein Structure\")\n",
+    "    predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
+    "    download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n",
+    "    \n",
+    "    prediction_btn.click(\n",
+    "        process_pdb, \n",
+    "        inputs=[\n",
+    "            pdb_input, \n",
+    "            segment_input\n",
+    "        ], \n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "    visualize_btn.click(\n",
+    "        fetch_pdb, \n",
+    "        inputs=[pdb_input], \n",
+    "        outputs=molecule_output2\n",
+    "    )\n",
+    "\n",
+    "    gr.Markdown(\"## Examples\")\n",
+    "    gr.Examples(\n",
+    "        examples=[\n",
+    "            [\"7RPZ\", \"A\"],\n",
+    "            [\"2IWI\", \"B\"],\n",
+    "            [\"2F6V\", \"A\"]\n",
+    "        ],\n",
+    "        inputs=[pdb_input, segment_input],\n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "demo.launch(share=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f960cc2-8330-40f1-b54d-693ce922fa74",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cec41eef-c414-440f-a0ea-63fc8d3acf0b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (LLM)",
+   "language": "python",
+   "name": "llm"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

2IWI.cif ADDED Viewed

The diff for this file is too large to render. See raw diff

2IWI.pdb CHANGED Viewed

The diff for this file is too large to render. See raw diff

2IWI_predictions.txt CHANGED Viewed

@@ -1,244 +1,249 @@
-Y 32 0.77
-R 33 0.71
-L 34 0.07
-G 35 0.04
-P 36 0.28
-L 37 0.39
-L 38 0.81
-G 39 0.67
-K 40 0.56
-G 41 0.16
-G 42 0.52
-F 43 0.59
-G 44 0.64
-T 45 0.62
-V 46 0.11
-F 47 0.07
-A 48 0.35
-G 49 0.94
-H 50 0.23
-R 51 0.78
-L 52 0.08
-T 53 0.65
-D 54 0.45
-R 55 0.12
-L 56 0.08
-Q 57 0.67
-V 58 0.44
-A 59 0.48
-I 60 0.31
-K 61 0.01
-V 62 0.57
-I 63 0.40
-P 64 0.43
-R 65 0.56
-N 66 0.36
-R 67 0.80
-V 68 0.68
-L 69 0.33
-V 78 0.63
-T 79 0.11
-C 80 0.56
-P 81 0.01
-L 82 0.93
-E 83 0.19
-V 84 0.26
-A 85 0.26
-L 86 0.68
-L 87 0.44
-W 88 0.91
-K 89 0.65
-V 90 0.38
-G 91 0.30
-A 92 0.18
-G 93 0.56
-G 94 0.90
-G 95 0.89
-H 96 0.36
-P 97 0.23
-G 98 0.04
-V 99 0.65
-I 100 0.83
-R 101 0.35
-L 102 0.47
-L 103 0.53
-D 104 0.26
-W 105 0.12
-F 106 0.14
-F 112 0.22
-M 113 0.04
-L 114 0.84
-V 115 0.64
-L 116 0.82
-E 117 0.42
-R 118 0.79
-P 119 0.18
-L 120 0.68
-P 121 0.35
-A 122 0.40
-Q 123 0.48
-D 124 0.02
-L 125 0.66
-F 126 0.51
-D 127 0.21
-Y 128 0.33
-I 129 0.37
-T 130 0.16
-E 131 0.35
-K 132 0.10
-G 133 0.39
-P 134 0.64
-L 135 0.56
-G 136 0.59
-E 137 0.85
-G 138 0.21
-P 139 0.27
-S 140 0.27
-R 141 0.12
-C 142 0.43
-F 143 0.32
-F 144 0.80
-G 145 0.49
-Q 146 0.81
-V 147 0.06
-V 148 0.40
-A 149 0.45
-A 150 0.77
-I 151 0.53
-Q 152 0.34
-H 153 0.33
-C 154 0.89
-H 155 0.51
-S 156 0.48
-R 157 0.21
-G 158 0.64
-V 159 0.00
-V 160 0.85
-H 161 0.55
-R 162 0.74
-D 163 0.41
-I 164 0.57
-K 165 0.63
-D 166 0.77
-E 167 0.06
-N 168 0.67
-I 169 0.65
-L 170 0.79
-I 171 0.42
-D 172 0.39
-L 173 0.49
-R 174 0.63
-R 175 0.39
-G 176 0.72
-C 177 0.88
-A 178 0.79
-K 179 0.76
-L 180 0.81
-I 181 0.30
-D 182 0.22
-F 183 0.31
-G 184 0.85
-S 185 0.67
-G 186 0.25
-A 187 0.50
-L 188 0.96
-L 189 0.26
-H 190 0.13
-D 191 0.29
-E 192 0.02
-P 193 0.65
-Y 194 0.32
-T 195 0.41
-D 196 0.82
-F 197 0.34
-D 198 0.15
-G 199 0.20
-T 200 0.46
-R 201 0.22
-V 202 0.26
-Y 203 0.29
-S 204 0.51
-P 205 0.70
-P 206 0.14
-E 207 0.89
-W 208 0.09
-I 209 0.54
-S 210 0.16
-R 211 0.69
-H 212 0.63
-Q 213 0.06
-Y 214 0.02
-H 215 0.17
-A 216 0.23
-L 217 0.65
-P 218 0.13
-A 219 0.91
-T 220 0.97
-V 221 0.77
-W 222 0.40
-S 223 0.91
-L 224 1.00
-G 225 0.97
-I 226 0.24
-L 227 0.44
-L 228 0.19
-Y 229 0.06
-D 230 0.32
-M 231 0.93
-V 232 0.35
-C 233 0.79
-G 234 0.50
-D 235 0.49
-I 236 0.10
-P 237 0.49
-F 238 0.12
-E 239 0.47
-R 240 0.40
-D 241 0.63
-Q 242 1.00
-E 243 0.55
-I 244 0.78
-L 245 0.92
-E 246 0.29
-A 247 0.37
-E 248 0.78
-L 249 0.54
-H 250 0.64
-F 251 0.30
-P 252 0.25
-A 253 0.01
-H 254 0.51
-V 255 0.81
-S 256 0.93
-P 257 0.79
-D 258 0.74
-C 259 0.31
-C 260 0.51
-A 261 0.91
-L 262 0.04
-I 263 0.21
-R 264 0.07
-R 265 0.34
-C 266 0.93
-L 267 0.80
-A 268 0.75
-P 269 0.59
-K 270 0.62
-P 271 0.18
-S 272 0.32
-S 273 0.54
-R 274 0.14
-P 275 0.38
-S 276 0.29
-L 277 0.97
-E 278 0.72
-E 279 0.05
-I 280 0.26
-L 281 0.24
-L 282 0.42
-D 283 0.84
-P 284 0.36
-W 285 0.79
-M 286 0.05
-Q 287 0.81
-T 288 0.39

+GLY 22 G 0.18
+LYS 23 K 0.51
+ASP 24 D 0.12
+ARG 25 R 0.25
+GLU 26 E 0.08
+ALA 27 A 0.82
+PHE 28 F 0.65
+GLU 29 E 0.65
+ALA 30 A 0.22
+GLU 31 E 0.49
+TYR 32 Y 0.57
+ARG 33 R 0.56
+LEU 34 L 0.83
+GLY 35 G 0.42
+PRO 36 P 0.97
+LEU 37 L 0.65
+LEU 38 L 0.08
+GLY 39 G 0.05
+LYS 40 K 0.55
+GLY 41 G 0.38
+GLY 42 G 0.45
+PHE 43 F 0.92
+GLY 44 G 0.00
+THR 45 T 0.76
+VAL 46 V 0.63
+PHE 47 F 0.97
+ALA 48 A 0.57
+GLY 49 G 0.94
+HIS 50 H 0.40
+ARG 51 R 0.27
+LEU 52 L 0.65
+THR 53 T 0.84
+ASP 54 D 0.85
+ARG 55 R 0.46
+LEU 56 L 0.87
+GLN 57 Q 0.76
+VAL 58 V 0.22
+ALA 59 A 0.65
+ILE 60 I 0.87
+LYS 61 K 0.69
+VAL 62 V 0.76
+ILE 63 I 0.70
+PRO 64 P 0.04
+ARG 65 R 0.20
+THR 79 T 0.80
+CYS 80 C 0.82
+PRO 81 P 0.72
+LEU 82 L 0.17
+GLU 83 E 0.70
+VAL 84 V 0.21
+ALA 85 A 0.15
+LEU 86 L 0.28
+LEU 87 L 0.03
+TRP 88 W 0.18
+LYS 89 K 0.01
+VAL 90 V 0.43
+GLY 91 G 0.25
+ALA 92 A 0.65
+GLY 93 G 0.00
+GLY 94 G 0.52
+GLY 95 G 0.22
+HIS 96 H 0.03
+PRO 97 P 0.57
+GLY 98 G 0.32
+VAL 99 V 0.89
+ILE 100 I 0.14
+ARG 101 R 0.66
+LEU 102 L 0.18
+LEU 103 L 0.30
+ASP 104 D 0.36
+TRP 105 W 0.83
+PHE 106 F 0.77
+GLU 107 E 0.95
+PHE 112 F 0.04
+MET 113 M 0.05
+LEU 114 L 0.32
+VAL 115 V 1.00
+LEU 116 L 0.43
+GLU 117 E 0.76
+ARG 118 R 0.65
+PRO 119 P 0.28
+LEU 120 L 0.74
+PRO 121 P 0.69
+ALA 122 A 0.89
+GLN 123 Q 0.68
+ASP 124 D 0.67
+LEU 125 L 0.89
+PHE 126 F 0.33
+ASP 127 D 0.05
+TYR 128 Y 0.59
+ILE 129 I 0.19
+THR 130 T 0.88
+GLU 131 E 0.24
+LYS 132 K 0.04
+GLY 133 G 0.99
+PRO 134 P 0.43
+LEU 135 L 0.31
+GLY 136 G 0.83
+GLU 137 E 0.12
+GLY 138 G 0.02
+PRO 139 P 0.71
+SER 140 S 0.70
+ARG 141 R 0.63
+CYS 142 C 0.70
+PHE 143 F 0.92
+PHE 144 F 0.02
+GLY 145 G 0.72
+GLN 146 Q 0.03
+VAL 147 V 0.70
+VAL 148 V 0.34
+ALA 149 A 0.95
+ALA 150 A 0.39
+ILE 151 I 0.21
+GLN 152 Q 0.86
+HIS 153 H 0.11
+CYS 154 C 0.30
+HIS 155 H 0.12
+SER 156 S 0.55
+ARG 157 R 0.20
+GLY 158 G 0.32
+VAL 159 V 0.80
+VAL 160 V 0.43
+HIS 161 H 0.99
+ARG 162 R 0.13
+ASP 163 D 0.73
+ILE 164 I 0.70
+LYS 165 K 0.88
+ASP 166 D 0.56
+GLU 167 E 0.61
+ASN 168 N 0.01
+ILE 169 I 0.48
+LEU 170 L 0.18
+ILE 171 I 0.28
+ASP 172 D 0.79
+LEU 173 L 0.33
+ARG 174 R 0.31
+ARG 175 R 0.39
+GLY 176 G 0.19
+CYS 177 C 0.57
+ALA 178 A 0.99
+LYS 179 K 0.47
+LEU 180 L 0.02
+ILE 181 I 0.81
+ASP 182 D 0.59
+PHE 183 F 0.74
+GLY 184 G 0.43
+SER 185 S 0.90
+GLY 186 G 0.87
+ALA 187 A 0.39
+LEU 188 L 0.43
+LEU 189 L 0.84
+HIS 190 H 0.91
+ASP 191 D 0.45
+GLU 192 E 0.00
+PRO 193 P 0.86
+TYR 194 Y 0.11
+THR 195 T 0.54
+ASP 196 D 0.70
+PHE 197 F 0.62
+ASP 198 D 0.31
+GLY 199 G 0.41
+THR 200 T 0.85
+ARG 201 R 0.18
+VAL 202 V 0.10
+TYR 203 Y 0.22
+SER 204 S 0.31
+PRO 205 P 0.41
+PRO 206 P 0.87
+GLU 207 E 0.77
+TRP 208 W 0.51
+ILE 209 I 0.18
+SER 210 S 0.03
+ARG 211 R 0.41
+HIS 212 H 0.83
+GLN 213 Q 0.30
+TYR 214 Y 0.38
+HIS 215 H 0.28
+ALA 216 A 0.51
+LEU 217 L 0.61
+PRO 218 P 0.77
+ALA 219 A 0.79
+THR 220 T 0.32
+VAL 221 V 0.35
+TRP 222 W 0.44
+SER 223 S 0.35
+LEU 224 L 0.67
+GLY 225 G 0.21
+ILE 226 I 0.88
+LEU 227 L 0.38
+LEU 228 L 0.27
+TYR 229 Y 0.53
+ASP 230 D 0.36
+MET 231 M 0.76
+VAL 232 V 0.59
+CYS 233 C 0.44
+GLY 234 G 0.88
+ASP 235 D 0.54
+ILE 236 I 0.63
+PRO 237 P 0.41
+PHE 238 F 0.84
+GLU 239 E 0.66
+ARG 240 R 0.20
+ASP 241 D 0.08
+GLN 242 Q 0.23
+GLU 243 E 0.31
+ILE 244 I 0.17
+LEU 245 L 0.58
+GLU 246 E 0.76
+ALA 247 A 0.82
+GLU 248 E 0.39
+LEU 249 L 0.53
+HIS 250 H 0.67
+PHE 251 F 0.36
+PRO 252 P 0.16
+ALA 253 A 0.08
+HIS 254 H 0.53
+VAL 255 V 0.39
+SER 256 S 0.24
+PRO 257 P 0.06
+ASP 258 D 0.79
+CYS 259 C 0.54
+CYS 260 C 0.46
+ALA 261 A 0.29
+LEU 262 L 0.60
+ILE 263 I 0.33
+ARG 264 R 0.56
+ARG 265 R 0.95
+CYS 266 C 0.63
+LEU 267 L 0.83
+ALA 268 A 0.22
+PRO 269 P 0.18
+LYS 270 K 0.71
+PRO 271 P 0.91
+SER 272 S 0.84
+SER 273 S 0.62
+ARG 274 R 0.22
+PRO 275 P 0.34
+SER 276 S 0.74
+LEU 277 L 0.41
+GLU 278 E 0.78
+GLU 279 E 0.76
+ILE 280 I 0.40
+LEU 281 L 0.27
+LEU 282 L 0.23
+ASP 283 D 0.65
+PRO 284 P 0.45
+TRP 285 W 0.72
+MET 286 M 0.57
+GLN 287 Q 0.29

4BDU.cif ADDED Viewed

The diff for this file is too large to render. See raw diff

4BDU.pdb ADDED Viewed

The diff for this file is too large to render. See raw diff

4BDU_A_scored.pdb ADDED Viewed

The diff for this file is too large to render. See raw diff

4BDU_C_scored.pdb ADDED Viewed

The diff for this file is too large to render. See raw diff

4BDU_predictions.txt ADDED Viewed

	@@ -0,0 +1,300 @@

+Prediction for PDB: 4BDU, Chain: A
+Date: 2024-12-11 16:57:50
+Columns: Residue Name, Residue Number, One-letter Code, Normalized Score
+SER 2 S 0.05
+LYS 3 K 0.39
+GLY 4 G 0.24
+GLU 5 E 0.26
+GLU 6 E 0.35
+LEU 7 L 0.45
+PHE 8 F 0.82
+THR 9 T 0.32
+GLY 10 G 0.73
+VAL 11 V 0.42
+VAL 12 V 0.33
+PRO 13 P 0.96
+ILE 14 I 0.68
+LEU 15 L 0.71
+VAL 16 V 0.84
+GLU 17 E 0.26
+LEU 18 L 0.54
+ASP 19 D 0.46
+GLY 20 G 0.12
+ASP 21 D 0.57
+VAL 22 V 0.32
+ASN 23 N 0.18
+GLY 24 G 0.48
+HIS 25 H 0.95
+LYS 26 K 0.88
+PHE 27 F 0.13
+SER 28 S 0.12
+VAL 29 V 0.58
+SER 30 S 0.19
+GLY 31 G 0.09
+GLU 32 E 0.17
+GLY 33 G 0.60
+GLU 34 E 0.92
+GLY 35 G 0.48
+ASP 36 D 0.35
+ALA 37 A 0.72
+THR 38 T 0.47
+TYR 39 Y 0.11
+GLY 40 G 0.57
+LYS 41 K 0.86
+LEU 42 L 0.42
+THR 43 T 0.98
+LEU 44 L 0.27
+LYS 45 K 0.05
+PHE 46 F 0.54
+ILE 47 I 0.25
+CYS 48 C 0.73
+THR 49 T 0.44
+THR 50 T 0.85
+GLY 51 G 0.17
+LYS 52 K 0.72
+LEU 53 L 0.03
+PRO 54 P 0.26
+VAL 55 V 0.64
+PRO 56 P 0.88
+TRP 57 W 0.84
+PRO 58 P 0.71
+THR 59 T 0.41
+LEU 60 L 0.18
+VAL 61 V 0.32
+THR 62 T 0.87
+THR 63 T 0.87
+PHE 64 F 1.00
+VAL 68 V 0.50
+GLN 69 Q 0.10
+CYS 70 C 0.71
+PHE 71 F 0.47
+SER 72 S 0.46
+ARG 73 R 0.99
+TYR 74 Y 0.40
+PRO 75 P 0.78
+ASP 76 D 0.42
+HIS 77 H 0.93
+MET 78 M 0.47
+LYS 79 K 0.51
+GLN 80 Q 0.85
+HIS 81 H 0.11
+ASP 82 D 0.87
+PHE 83 F 0.13
+PHE 84 F 0.56
+LYS 85 K 0.44
+SER 86 S 0.44
+ALA 87 A 0.20
+MET 88 M 0.33
+PRO 89 P 0.77
+GLU 90 E 0.32
+GLY 91 G 0.80
+TYR 92 Y 0.52
+VAL 93 V 0.46
+GLN 94 Q 0.26
+GLU 95 E 0.03
+ARG 96 R 0.99
+THR 97 T 0.72
+ILE 98 I 0.38
+PHE 99 F 0.63
+PHE 100 F 0.03
+LYS 101 K 0.10
+ASP 102 D 0.52
+ASP 103 D 0.41
+GLY 104 G 0.91
+ASN 105 N 0.17
+TYR 106 Y 0.75
+LYS 107 K 0.07
+THR 108 T 0.78
+ARG 109 R 0.21
+ALA 110 A 0.93
+GLU 111 E 0.34
+VAL 112 V 0.06
+LYS 113 K 0.92
+PHE 114 F 0.43
+GLU 115 E 0.22
+GLY 116 G 0.67
+ASP 117 D 0.54
+THR 118 T 0.18
+LEU 119 L 0.33
+VAL 120 V 0.52
+ASN 121 N 0.23
+ARG 122 R 0.18
+ILE 123 I 0.52
+GLU 124 E 0.85
+LEU 125 L 0.66
+LYS 126 K 0.69
+GLY 127 G 0.46
+ILE 128 I 0.48
+ASP 129 D 0.55
+PHE 130 F 0.90
+LYS 131 K 1.00
+GLU 132 E 0.98
+ASP 133 D 0.41
+GLY 134 G 0.78
+ASN 135 N 0.12
+ILE 136 I 0.06
+LEU 137 L 0.80
+GLY 138 G 0.70
+HIS 139 H 0.52
+LYS 140 K 0.40
+LEU 141 L 0.97
+GLU 142 E 0.25
+TYR 143 Y 0.53
+ASN 144 N 0.26
+TYR 145 Y 0.67
+ASN 146 N 0.65
+SER 147 S 0.91
+HIS 148 H 0.82
+ASN 149 N 0.93
+VAL 150 V 0.67
+TYR 151 Y 0.87
+ILE 152 I 0.02
+MET 153 M 0.37
+ALA 154 A 0.50
+ASP 155 D 0.89
+LYS 156 K 1.00
+GLN 157 Q 0.96
+LYS 158 K 0.83
+ASN 159 N 0.95
+GLY 160 G 0.02
+ILE 161 I 0.57
+LYS 162 K 0.82
+VAL 163 V 0.66
+ASN 164 N 0.32
+PHE 165 F 0.50
+LYS 166 K 0.11
+ILE 167 I 0.49
+ARG 168 R 0.20
+HIS 169 H 0.82
+ASN 170 N 0.34
+ILE 171 I 0.91
+GLU 172 E 0.28
+ASP 173 D 0.02
+GLY 174 G 0.09
+SER 175 S 0.44
+VAL 176 V 0.87
+GLN 177 Q 0.65
+LEU 178 L 0.88
+ALA 179 A 0.89
+ASP 180 D 0.53
+HIS 181 H 0.89
+TYR 182 Y 0.44
+GLN 183 Q 0.02
+GLN 184 Q 0.91
+ASN 185 N 0.57
+THR 186 T 0.00
+PRO 187 P 0.97
+ILE 188 I 0.17
+GLY 189 G 0.57
+ASP 190 D 0.46
+GLY 191 G 0.08
+PRO 192 P 0.85
+VAL 193 V 0.09
+LEU 194 L 0.79
+LEU 195 L 0.61
+PRO 196 P 0.72
+ASP 197 D 0.29
+ASN 198 N 0.95
+HIS 199 H 0.78
+TYR 200 Y 0.02
+LEU 201 L 0.55
+SER 202 S 0.63
+THR 203 T 0.38
+GLN 204 Q 0.18
+SER 205 S 0.48
+ASN 206 N 0.19
+LEU 207 L 0.71
+SER 208 S 0.56
+LYS 209 K 0.56
+ASP 210 D 0.98
+PRO 211 P 0.43
+ASN 212 N 0.91
+GLU 213 E 0.76
+LYS 214 K 0.58
+ARG 215 R 0.42
+ASP 216 D 0.81
+HIS 217 H 0.96
+MET 218 M 0.26
+VAL 219 V 0.01
+LEU 220 L 0.27
+LEU 221 L 0.26
+GLU 222 E 0.92
+PHE 223 F 0.84
+VAL 224 V 0.72
+THR 225 T 1.00
+ALA 226 A 0.55
+ALA 227 A 0.72
+GLY 228 G 0.44
+ILE 229 I 0.01
+THR 230 T 0.98
+ALA 1054 A 0.83
+SER 1055 S 0.78
+THR 1056 T 0.55
+LYS 1057 K 0.40
+LYS 1058 K 0.06
+LEU 1059 L 0.82
+SER 1060 S 0.59
+GLU 1061 E 0.68
+SER 1062 S 0.28
+LEU 1063 L 0.79
+LYS 1064 K 0.94
+ARG 1065 R 0.32
+ILE 1066 I 0.28
+GLY 1067 G 0.94
+ASP 1068 D 0.19
+GLU 1069 E 0.76
+LEU 1070 L 0.19
+ASP 1071 D 0.14
+SER 1072 S 0.04
+ASN 1073 N 0.39
+MET 1074 M 0.50
+GLU 1075 E 0.92
+LEU 1076 L 0.81
+GLN 1077 Q 0.04
+ARG 1078 R 0.97
+MET 1079 M 0.20
+ILE 1080 I 0.90
+ALA 1081 A 0.43
+ALA 1082 A 0.93
+VAL 1083 V 0.28
+ASP 1084 D 0.29
+THR 1085 T 0.83
+ASP 1086 D 0.79
+SER 1087 S 0.39
+PRO 1088 P 0.85
+ARG 1089 R 0.41
+GLU 1090 E 0.08
+VAL 1091 V 0.10
+PHE 1092 F 0.15
+PHE 1093 F 0.10
+ARG 1094 R 0.59
+VAL 1095 V 0.69
+ALA 1096 A 0.50
+ALA 1097 A 0.86
+ASP 1098 D 0.77
+MET 1099 M 0.60
+PHE 1100 F 0.13
+SER 1101 S 0.22
+ASP 1102 D 0.29
+GLY 1103 G 0.22
+ASN 1104 N 0.01
+PHE 1105 F 0.24
+ASN 1106 N 0.48
+TRP 1107 W 0.45
+GLY 1108 G 0.52
+ARG 1109 R 0.86
+VAL 1110 V 0.68
+VAL 1111 V 0.96
+ALA 1112 A 0.01
+LEU 1113 L 0.88
+PHE 1114 F 0.66
+TYR 1115 Y 0.11
+PHE 1116 F 0.62
+ALA 1117 A 0.62
+SER 1118 S 0.26
+LYS 1119 K 0.58
+LEU 1120 L 0.18
+VAL 1121 V 0.85
+LEU 1122 L 0.27

app.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import gradio as gr
 import requests
-from Bio.PDB import PDBParser
 import numpy as np
 import os
 from gradio_molecule3d import Molecule3D
@@ -25,6 +28,8 @@ from datasets import Dataset
 from scipy.special import expit
 # Load model and move to device
 checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
 max_length = 1500
@@ -37,119 +42,250 @@ def normalize_scores(scores):
     min_score = np.min(scores)
     max_score = np.max(scores)
     return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
 def read_mol(pdb_path):
     """Read PDB file and return its content as a string"""
     with open(pdb_path, 'r') as f:
         return f.read()
-def fetch_pdb(pdb_id):
-    pdb_url = f'https://files.rcsb.org/download/{pdb_id}.pdb'
-    pdb_path = f'{pdb_id}.pdb'
-    response = requests.get(pdb_url)
-    if response.status_code == 200:
-        with open(pdb_path, 'wb') as f:
-            f.write(response.content)
-        return pdb_path
     else:
         return None
-def process_pdb(pdb_id, segment):
-    pdb_path = fetch_pdb(pdb_id)
     if not pdb_path:
-        return "Failed to fetch PDB file", None, None
-    parser = PDBParser(QUIET=1)
     structure = parser.get_structure('protein', pdb_path)
     try:
         chain = structure[0][segment]
     except KeyError:
         return "Invalid Chain ID", None, None
-    aa_dict = {
-        'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F',
-        'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L',
-        'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R',
-        'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y',
-        'MSE': 'M', 'SEP': 'S', 'TPO': 'T', 'CSO': 'C', 'PTR': 'Y', 'HYP': 'P'
-    }
-    # Exclude non-amino acid residues
-    sequence = "".join(
-        aa_dict[residue.get_resname().strip()]
-        for residue in chain
-        if residue.get_resname().strip() in aa_dict
-    )
-    sequence2 = [
-        (res.id[1], res) for res in chain
-        if res.get_resname().strip() in aa_dict
-    ]
     # Prepare input for model prediction
     input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
         outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
     # Calculate scores and normalize them
     scores = expit(outputs[:, 1] - outputs[:, 0])
     normalized_scores = normalize_scores(scores)
-    # Zip residues with scores to track the residue ID and score
-    residue_scores = [(resi, score) for (resi, _), score in zip(sequence2, normalized_scores)]
-    result_str = "\n".join([
-        f"{res.get_resname()} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
-        for i, res in enumerate(chain) if res.get_resname().strip() in aa_dict
-    ])
-    # Save the predictions to a file
     prediction_file = f"{pdb_id}_predictions.txt"
     with open(prediction_file, "w") as f:
         f.write(result_str)
-    return result_str, molecule(pdb_path, residue_scores, segment), prediction_file
 def molecule(input_pdb, residue_scores=None, segment='A'):
     mol = read_mol(input_pdb)  # Read PDB file content
     # Prepare high-scoring residues script if scores are provided
     high_score_script = ""
     if residue_scores is not None:
-        # Sort residues based on their scores
         high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
         mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
         high_score_script = """
-        // Reset all styles first
-        viewer.getModel(0).setStyle({}, {});
-        // Show only the selected chain
-        viewer.getModel(0).setStyle(
             {"chain": "%s"},
-            { cartoon: {colorscheme:"whiteCarbon"} }
         );
-        // Highlight high-scoring residues only for the selected chain
-        let highScoreResidues = [%s];
-        viewer.getModel(0).setStyle(
-            {"chain": "%s", "resi": highScoreResidues},
             {"stick": {"color": "red"}}
         );
-        // Highlight medium-scoring residues only for the selected chain
-        let midScoreResidues = [%s];
-        viewer.getModel(0).setStyle(
-            {"chain": "%s", "resi": midScoreResidues},
             {"stick": {"color": "orange"}}
         );
-        """ % (segment,
-               ", ".join(str(resi) for resi in high_score_residues),
-               segment,
-               ", ".join(str(resi) for resi in mid_score_residues),
-               segment)
     html_content = f"""
     <!DOCTYPE html>
     <html>
@@ -173,13 +309,6 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
                 let element = $("#container");
                 let config = {{ backgroundColor: "white" }};
                 let viewer = $3Dmol.createViewer(element, config);
-                viewer.addModel(pdb, "pdb");
-                // Reset all styles and show only selected chain
-                viewer.getModel(0).setStyle(
-                    {{"chain": "{segment}"}},
-                    {{ cartoon: {{ colorscheme:"whiteCarbon" }} }}
-                );
                 {high_score_script}
@@ -221,39 +350,50 @@ def molecule(input_pdb, residue_scores=None, segment='A'):
     # Return the HTML content within an iframe safely encoded for special characters
     return f'<iframe width="100%" height="700" srcdoc="{html_content.replace(chr(34), "&quot;").replace(chr(39), "&#39;")}"></iframe>'
-reps =    [
-        {
-          "model": 0,
-          "style": "cartoon",
-          "color": "whiteCarbon",
-          "residue_range": "",
-          "around": 0,
-          "byres": False,
-        }
-    ]
 # Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# Protein Binding Site Prediction")
     with gr.Row():
-        pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
         visualize_btn = gr.Button("Visualize Structure")
-    molecule_output2 = Molecule3D(label="Protein Structure", reps=reps)
     with gr.Row():
-        #pdb_input = gr.Textbox(value="2IWI", label="PDB ID", placeholder="Enter PDB ID here...")
         segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
         prediction_btn = gr.Button("Predict Binding Site")
     molecule_output = gr.HTML(label="Protein Structure")
     predictions_output = gr.Textbox(label="Binding Site Predictions")
-    download_output = gr.File(label="Download Predictions")
-    visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)
-    prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])
     gr.Markdown("## Examples")
     gr.Examples(
         examples=[

 import gradio as gr
 import requests
+from Bio.PDB import PDBParser, MMCIFParser, PDBIO
+from Bio.PDB.Polypeptide import is_aa
+from Bio.SeqUtils import seq1
+from typing import Optional, Tuple
 import numpy as np
 import os
 from gradio_molecule3d import Molecule3D
 from scipy.special import expit
 # Load model and move to device
 checkpoint = 'ThorbenF/prot_t5_xl_uniref50'
 max_length = 1500
     min_score = np.min(scores)
     max_score = np.max(scores)
     return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores
 def read_mol(pdb_path):
     """Read PDB file and return its content as a string"""
     with open(pdb_path, 'r') as f:
         return f.read()
+def fetch_structure(pdb_id: str, output_dir: str = ".") -> Optional[str]:
+    """
+    Fetch the structure file for a given PDB ID. Prioritizes CIF files.
+    If a structure file already exists locally, it uses that.
+    """
+    file_path = download_structure(pdb_id, output_dir)
+    if file_path:
+        return file_path
     else:
         return None
+def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:
+    """
+    Attempt to download the structure file in CIF or PDB format.
+    Returns the path to the downloaded file, or None if download fails.
+    """
+    for ext in ['.cif', '.pdb']:
+        file_path = os.path.join(output_dir, f"{pdb_id}{ext}")
+        if os.path.exists(file_path):
+            return file_path
+        url = f"https://files.rcsb.org/download/{pdb_id}{ext}"
+        try:
+            response = requests.get(url, timeout=10)
+            if response.status_code == 200:
+                with open(file_path, 'wb') as f:
+                    f.write(response.content)
+                return file_path
+        except Exception as e:
+            print(f"Download error for {pdb_id}{ext}: {e}")
+    return None
+def convert_cif_to_pdb(cif_path: str, output_dir: str = ".") -> str:
+    """
+    Convert a CIF file to PDB format using BioPython and return the PDB file path.
+    """
+    pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))
+    parser = MMCIFParser(QUIET=True)
+    structure = parser.get_structure('protein', cif_path)
+    io = PDBIO()
+    io.set_structure(structure)
+    io.save(pdb_path)
+    return pdb_path
+def fetch_pdb(pdb_id):
+    pdb_path = fetch_structure(pdb_id)
     if not pdb_path:
+        return None
+    _, ext = os.path.splitext(pdb_path)
+    if ext == '.cif':
+        pdb_path = convert_cif_to_pdb(pdb_path)
+    return pdb_path
+def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:
+    """
+    Create a PDB file with only the specified chain and replace B-factor with prediction scores
+    """
+    # Read the original PDB file
+    parser = PDBParser(QUIET=True)
+    structure = parser.get_structure('protein', input_pdb)
+    # Prepare a new structure with only the specified chain
+    new_structure = structure.copy()
+    for model in new_structure:
+        # Remove all chains except the specified one
+        chains_to_remove = [chain for chain in model if chain.id != chain_id]
+        for chain in chains_to_remove:
+            model.detach_child(chain.id)
+    # Create a modified PDB with scores in B-factor
+    scores_dict = {resi: score for resi, score in residue_scores}
+    for model in new_structure:
+        for chain in model:
+            for residue in chain:
+                if residue.id[1] in scores_dict:
+                    for atom in residue:
+                        atom.bfactor = scores_dict[residue.id[1]] #* 100  # Scale score to B-factor range
+    # Save the modified structure
+    output_pdb = f"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb"
+    io = PDBIO()
+    io.set_structure(new_structure)
+    io.save(output_pdb)
+    return output_pdb
+def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):
+    """
+    Calculate the geometric center of high-scoring residues
+    """
+    parser = PDBParser(QUIET=True)
     structure = parser.get_structure('protein', pdb_path)
+    # Collect coordinates of CA atoms from high-scoring residues
+    coords = []
+    for model in structure:
+        for chain in model:
+            if chain.id == chain_id:
+                for residue in chain:
+                    if residue.id[1] in high_score_residues:
+                        if 'CA' in residue:  # Use alpha carbon as representative
+                            ca_atom = residue['CA']
+                            coords.append(ca_atom.coord)
+    # Calculate geometric center
+    if coords:
+        center = np.mean(coords, axis=0)
+        return center
+    return None
+def process_pdb(pdb_id_or_file, segment):
+    # Determine if input is a PDB ID or file path
+    if pdb_id_or_file.endswith('.pdb'):
+        pdb_path = pdb_id_or_file
+        pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]
+    else:
+        pdb_id = pdb_id_or_file
+        pdb_path = fetch_pdb(pdb_id)
+    if not pdb_path:
+        return "Failed to fetch PDB file", None, None
+    # Determine the file format and choose the appropriate parser
+    _, ext = os.path.splitext(pdb_path)
+    parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)
+    try:
+        # Parse the structure file
+        structure = parser.get_structure('protein', pdb_path)
+    except Exception as e:
+        return f"Error parsing structure file: {e}", None, None
+    # Extract the specified chain
     try:
         chain = structure[0][segment]
     except KeyError:
         return "Invalid Chain ID", None, None
+    protein_residues = [res for res in chain if is_aa(res)]
+    sequence = "".join(seq1(res.resname) for res in protein_residues)
+    sequence_id = [res.id[1] for res in protein_residues]
     # Prepare input for model prediction
     input_ids = tokenizer(" ".join(sequence), return_tensors="pt").input_ids.to(device)
     with torch.no_grad():
         outputs = model(input_ids).logits.detach().cpu().numpy().squeeze()
     # Calculate scores and normalize them
     scores = expit(outputs[:, 1] - outputs[:, 0])
     normalized_scores = normalize_scores(scores)
+    # Zip residues with scores to track the residue ID and score
+    residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]
+    # Identify high and mid scoring residues
+    high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
+    mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
+    # Calculate geometric center of high-scoring residues
+    geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)
+    pymol_selection = f"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}"
+    pymol_center_cmd = f"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}" if geo_center is not None else ""
+    # Generate the result string
+    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    result_str = f"Prediction for PDB: {pdb_id}, Chain: {segment}\nDate: {current_time}\n\n"
+    result_str += "Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\n\n"
+    result_str += "\n".join([
+        f"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}"
+        for i, res in enumerate(protein_residues)])
+    # Create prediction and scored PDB files
     prediction_file = f"{pdb_id}_predictions.txt"
     with open(prediction_file, "w") as f:
         f.write(result_str)
+    # Create chain-specific PDB with scores in B-factor
+    scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)
+    # Molecule visualization with updated script
+    mol_vis = molecule(pdb_path, residue_scores, segment)
+    # Construct PyMOL command suggestions
+    pymol_commands = f"""
+PyMOL Visualization Commands:
+1. Load PDB: load {os.path.abspath(pdb_path)}
+2. Select high-scoring residues: {pymol_selection}
+3. Highlight high-scoring residues: show sticks, high_score_residues
+{pymol_center_cmd}
+"""
+    return result_str + "\n\n" + pymol_commands, mol_vis, [prediction_file, scored_pdb]
 def molecule(input_pdb, residue_scores=None, segment='A'):
     mol = read_mol(input_pdb)  # Read PDB file content
     # Prepare high-scoring residues script if scores are provided
     high_score_script = ""
     if residue_scores is not None:
+        # Filter residues based on their scores
         high_score_residues = [resi for resi, score in residue_scores if score > 0.75]
         mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]
         high_score_script = """
+        // Load the original model and apply white cartoon style
+        let chainModel = viewer.addModel(pdb, "pdb");
+        chainModel.setStyle({}, {});
+        chainModel.setStyle(
             {"chain": "%s"},
+            {"cartoon": {"color": "white"}}
         );
+        // Create a new model for high-scoring residues and apply red sticks style
+        let highScoreModel = viewer.addModel(pdb, "pdb");
+        highScoreModel.setStyle({}, {});
+        highScoreModel.setStyle(
+            {"chain": "%s", "resi": [%s]},
             {"stick": {"color": "red"}}
         );
+        // Create a new model for medium-scoring residues and apply orange sticks style
+        let midScoreModel = viewer.addModel(pdb, "pdb");
+        midScoreModel.setStyle({}, {});
+        midScoreModel.setStyle(
+            {"chain": "%s", "resi": [%s]},
             {"stick": {"color": "orange"}}
         );
+        """ % (
+            segment,
+            segment,
+            ", ".join(str(resi) for resi in high_score_residues),
+            segment,
+            ", ".join(str(resi) for resi in mid_score_residues)
+        )
+    # Generate the full HTML content
     html_content = f"""
     <!DOCTYPE html>
     <html>
                 let element = $("#container");
                 let config = {{ backgroundColor: "white" }};
                 let viewer = $3Dmol.createViewer(element, config);
                 {high_score_script}
     # Return the HTML content within an iframe safely encoded for special characters
     return f'<iframe width="100%" height="700" srcdoc="{html_content.replace(chr(34), "&quot;").replace(chr(39), "&#39;")}"></iframe>'
 # Gradio UI
 with gr.Blocks() as demo:
     gr.Markdown("# Protein Binding Site Prediction")
     with gr.Row():
+        pdb_input = gr.Textbox(value="4BDU", label="PDB ID", placeholder="Enter PDB ID here...")
         visualize_btn = gr.Button("Visualize Structure")
+    molecule_output2 = Molecule3D(label="Protein Structure", reps=[
+        {
+            "model": 0,
+            "style": "cartoon",
+            "color": "whiteCarbon",
+            "residue_range": "",
+            "around": 0,
+            "byres": False,
+        }
+    ])
     with gr.Row():
         segment_input = gr.Textbox(value="A", label="Chain ID", placeholder="Enter Chain ID here...")
         prediction_btn = gr.Button("Predict Binding Site")
     molecule_output = gr.HTML(label="Protein Structure")
     predictions_output = gr.Textbox(label="Binding Site Predictions")
+    download_output = gr.File(label="Download Files", file_count="multiple")
+    prediction_btn.click(
+        process_pdb,
+        inputs=[
+            pdb_input,
+            segment_input
+        ],
+        outputs=[predictions_output, molecule_output, download_output]
+    )
+    visualize_btn.click(
+        fetch_pdb,
+        inputs=[pdb_input],
+        outputs=molecule_output2
+    )
     gr.Markdown("## Examples")
     gr.Examples(
         examples=[

test3.ipynb ADDED Viewed

	@@ -0,0 +1,1599 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "2b84eb4e-3f91-4a28-8e4f-322a34a9fb55",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7877\n",
+      "* Running on public URL: https://a35567ec94eccaf8d1.gradio.live\n",
+      "\n",
+      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"https://a35567ec94eccaf8d1.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from Bio.PDB import PDBParser, MMCIFParser, MMCIF2Dict, PDBIO\n",
+    "from Bio.PDB.Polypeptide import is_aa\n",
+    "from Bio.SeqUtils import seq1\n",
+    "import gradio as gr\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import requests\n",
+    "from gradio_molecule3d import Molecule3D\n",
+    "from scipy.special import expit\n",
+    "from typing import Optional\n",
+    "\n",
+    "def normalize_scores(scores):\n",
+    "    min_score = np.min(scores)\n",
+    "    max_score = np.max(scores)\n",
+    "    return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
+    "\n",
+    "def read_mol(pdb_path):\n",
+    "    \"\"\"Read PDB file and return its content as a string\"\"\"\n",
+    "    with open(pdb_path, 'r') as f:\n",
+    "        return f.read()\n",
+    "\n",
+    "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
+    "    If a structure file already exists locally, it uses that.\n",
+    "    \"\"\"\n",
+    "    file_path = download_structure(pdb_id, output_dir)\n",
+    "    if file_path:\n",
+    "        return file_path\n",
+    "    else:\n",
+    "        return None\n",
+    "\n",
+    "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Attempt to download the structure file in CIF or PDB format.\n",
+    "    Returns the path to the downloaded file, or None if download fails.\n",
+    "    \"\"\"\n",
+    "    for ext in ['.cif', '.pdb']:\n",
+    "        file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
+    "        if os.path.exists(file_path):\n",
+    "            return file_path\n",
+    "        url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
+    "        try:\n",
+    "            response = requests.get(url, timeout=10)\n",
+    "            if response.status_code == 200:\n",
+    "                with open(file_path, 'wb') as f:\n",
+    "                    f.write(response.content)\n",
+    "                return file_path\n",
+    "        except Exception as e:\n",
+    "            print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
+    "    return None\n",
+    "\n",
+    "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
+    "    \"\"\"\n",
+    "    Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
+    "    \"\"\"\n",
+    "    pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
+    "    parser = MMCIFParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', cif_path)\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(structure)\n",
+    "    io.save(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def fetch_pdb(pdb_id):\n",
+    "    pdb_path = fetch_structure(pdb_id)\n",
+    "    if not pdb_path:\n",
+    "        return None\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    if ext == '.cif':\n",
+    "        pdb_path = convert_cif_to_pdb(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def process_pdb(pdb_id, segment):\n",
+    "    # Fetch the PDB or CIF file\n",
+    "    pdb_path = fetch_pdb(pdb_id)\n",
+    "    if not pdb_path:\n",
+    "        return \"Failed to fetch PDB file\", None, None\n",
+    "    \n",
+    "    # Determine the file format and choose the appropriate parser\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
+    "    \n",
+    "    try:\n",
+    "        # Parse the structure file\n",
+    "        structure = parser.get_structure('protein', pdb_path)\n",
+    "    except Exception as e:\n",
+    "        return f\"Error parsing structure file: {e}\", None, None\n",
+    "    \n",
+    "    # Extract the specified chain\n",
+    "    try:\n",
+    "        chain = structure[0][segment]\n",
+    "    except KeyError:\n",
+    "        return \"Invalid Chain ID\", None, None\n",
+    "    \n",
+    "    protein_residues = [res for res in chain if is_aa(res)]\n",
+    "    sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
+    "    sequence_id = [res.id[1] for res in protein_residues]\n",
+    "    \n",
+    "    # Generate random scores for residues\n",
+    "    scores = np.random.rand(len(sequence))\n",
+    "    normalized_scores = normalize_scores(scores)\n",
+    "    \n",
+    "    # Zip residues with scores to track the residue ID and score\n",
+    "    residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
+    "\n",
+    "    # Generate the result string\n",
+    "    result_str = \"\\n\".join([\n",
+    "        f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
+    "        for i, res in enumerate(protein_residues)])\n",
+    "    \n",
+    "    # Save the predictions to a file\n",
+    "    prediction_file = f\"{pdb_id}_predictions.txt\"\n",
+    "    with open(prediction_file, \"w\") as f:\n",
+    "        f.write(result_str)\n",
+    "\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    if ext == '.cif':\n",
+    "        pdb_path = convert_cif_to_pdb(pdb_path)\n",
+    "\n",
+    "    return result_str, molecule(pdb_path, residue_scores, segment), prediction_file\n",
+    "\n",
+    "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
+    "    mol = read_mol(input_pdb)  # Read PDB file content\n",
+    "    \n",
+    "    # Prepare high-scoring residues script if scores are provided\n",
+    "    high_score_script = \"\"\n",
+    "    if residue_scores is not None:\n",
+    "        # Sort residues based on their scores\n",
+    "        high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "        mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "        \n",
+    "        high_score_script = \"\"\"\n",
+    "        // Reset all styles first\n",
+    "        viewer.getModel(0).setStyle({}, {});\n",
+    "        \n",
+    "        // Show only the selected chain\n",
+    "        viewer.getModel(0).setStyle(\n",
+    "            {\"chain\": \"%s\"}, \n",
+    "            { cartoon: {colorscheme:\"whiteCarbon\"} }\n",
+    "        );\n",
+    "        \n",
+    "        // Highlight high-scoring residues only for the selected chain\n",
+    "        let highScoreResidues = [%s];\n",
+    "        viewer.getModel(0).setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
+    "            {\"stick\": {\"color\": \"red\"}}\n",
+    "        );\n",
+    "\n",
+    "        // Highlight medium-scoring residues only for the selected chain\n",
+    "        let midScoreResidues = [%s];\n",
+    "        viewer.getModel(0).setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
+    "            {\"stick\": {\"color\": \"orange\"}}\n",
+    "        );\n",
+    "        \"\"\" % (segment, \n",
+    "               \", \".join(str(resi) for resi in high_score_residues),\n",
+    "               segment,\n",
+    "               \", \".join(str(resi) for resi in mid_score_residues),\n",
+    "               segment)\n",
+    "    \n",
+    "    html_content = f\"\"\"\n",
+    "    <!DOCTYPE html>\n",
+    "    <html>\n",
+    "    <head>    \n",
+    "        <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
+    "        <style>\n",
+    "        .mol-container {{\n",
+    "            width: 100%;\n",
+    "            height: 700px;\n",
+    "            position: relative;\n",
+    "        }}\n",
+    "        </style>\n",
+    "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
+    "        <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
+    "    </head>\n",
+    "    <body>\n",
+    "        <div id=\"container\" class=\"mol-container\"></div>\n",
+    "        <script>\n",
+    "            let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
+    "            $(document).ready(function () {{\n",
+    "                let element = $(\"#container\");\n",
+    "                let config = {{ backgroundColor: \"white\" }};\n",
+    "                let viewer = $3Dmol.createViewer(element, config);\n",
+    "                viewer.addModel(pdb, \"pdb\");\n",
+    "                \n",
+    "                // Reset all styles and show only selected chain\n",
+    "                viewer.getModel(0).setStyle(\n",
+    "                    {{\"chain\": \"{segment}\"}}, \n",
+    "                    {{ cartoon: {{ colorscheme:\"whiteCarbon\" }} }}\n",
+    "                );\n",
+    "                \n",
+    "                {high_score_script}\n",
+    "                \n",
+    "                // Add hover functionality\n",
+    "                viewer.setHoverable(\n",
+    "                    {{}}, \n",
+    "                    true, \n",
+    "                    function(atom, viewer, event, container) {{\n",
+    "                        if (!atom.label) {{\n",
+    "                            atom.label = viewer.addLabel(\n",
+    "                                atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
+    "                                {{\n",
+    "                                    position: atom, \n",
+    "                                    backgroundColor: 'mintcream', \n",
+    "                                    fontColor: 'black',\n",
+    "                                    fontSize: 12,\n",
+    "                                    padding: 2\n",
+    "                                }}\n",
+    "                            );\n",
+    "                        }}\n",
+    "                    }},\n",
+    "                    function(atom, viewer) {{\n",
+    "                        if (atom.label) {{\n",
+    "                            viewer.removeLabel(atom.label);\n",
+    "                            delete atom.label;\n",
+    "                        }}\n",
+    "                    }}\n",
+    "                );\n",
+    "                \n",
+    "                viewer.zoomTo();\n",
+    "                viewer.render();\n",
+    "                viewer.zoom(0.8, 2000);\n",
+    "            }});\n",
+    "        </script>\n",
+    "    </body>\n",
+    "    </html>\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    # Return the HTML content within an iframe safely encoded for special characters\n",
+    "    return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
+    "\n",
+    "reps = [\n",
+    "    {\n",
+    "        \"model\": 0,\n",
+    "        \"style\": \"cartoon\",\n",
+    "        \"color\": \"whiteCarbon\",\n",
+    "        \"residue_range\": \"\",\n",
+    "        \"around\": 0,\n",
+    "        \"byres\": False,\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "# Gradio UI\n",
+    "with gr.Blocks() as demo:\n",
+    "    gr.Markdown(\"# Protein Binding Site Prediction\")\n",
+    "    with gr.Row():\n",
+    "        pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
+    "        visualize_btn = gr.Button(\"Visualize Structure\")\n",
+    "\n",
+    "    molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=reps)\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
+    "        prediction_btn = gr.Button(\"Predict Binding Site\")\n",
+    "\n",
+    "    molecule_output = gr.HTML(label=\"Protein Structure\")\n",
+    "    predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
+    "    download_output = gr.File(label=\"Download Predictions\")\n",
+    "    \n",
+    "    visualize_btn.click(fetch_pdb, inputs=[pdb_input], outputs=molecule_output2)\n",
+    "    \n",
+    "    prediction_btn.click(process_pdb, inputs=[pdb_input, segment_input], outputs=[predictions_output, molecule_output, download_output])\n",
+    "    \n",
+    "    gr.Markdown(\"## Examples\")\n",
+    "    gr.Examples(\n",
+    "        examples=[\n",
+    "            [\"7RPZ\", \"A\"],\n",
+    "            [\"2IWI\", \"B\"],\n",
+    "            [\"2F6V\", \"A\"]\n",
+    "        ],\n",
+    "        inputs=[pdb_input, segment_input],\n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "demo.launch(share=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "a2f1ca04-7a27-4e4f-b44d-39b20c5d034a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7878\n",
+      "* Running on public URL: https://fbfb00e893a2d7c6ae.gradio.live\n",
+      "\n",
+      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"https://fbfb00e893a2d7c6ae.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from datetime import datetime\n",
+    "import gradio as gr\n",
+    "import numpy as np\n",
+    "import requests\n",
+    "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n",
+    "from Bio.PDB.Polypeptide import is_aa\n",
+    "from Bio.SeqUtils import seq1\n",
+    "from gradio_molecule3d import Molecule3D\n",
+    "from typing import Optional, Tuple\n",
+    "\n",
+    "def normalize_scores(scores):\n",
+    "    min_score = np.min(scores)\n",
+    "    max_score = np.max(scores)\n",
+    "    return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
+    "\n",
+    "def read_mol(pdb_path):\n",
+    "    \"\"\"Read PDB file and return its content as a string\"\"\"\n",
+    "    with open(pdb_path, 'r') as f:\n",
+    "        return f.read()\n",
+    "\n",
+    "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
+    "    If a structure file already exists locally, it uses that.\n",
+    "    \"\"\"\n",
+    "    file_path = download_structure(pdb_id, output_dir)\n",
+    "    if file_path:\n",
+    "        return file_path\n",
+    "    else:\n",
+    "        return None\n",
+    "\n",
+    "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Attempt to download the structure file in CIF or PDB format.\n",
+    "    Returns the path to the downloaded file, or None if download fails.\n",
+    "    \"\"\"\n",
+    "    for ext in ['.cif', '.pdb']:\n",
+    "        file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
+    "        if os.path.exists(file_path):\n",
+    "            return file_path\n",
+    "        url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
+    "        try:\n",
+    "            response = requests.get(url, timeout=10)\n",
+    "            if response.status_code == 200:\n",
+    "                with open(file_path, 'wb') as f:\n",
+    "                    f.write(response.content)\n",
+    "                return file_path\n",
+    "        except Exception as e:\n",
+    "            print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
+    "    return None\n",
+    "\n",
+    "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
+    "    \"\"\"\n",
+    "    Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
+    "    \"\"\"\n",
+    "    pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
+    "    parser = MMCIFParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', cif_path)\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(structure)\n",
+    "    io.save(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def fetch_pdb(pdb_id):\n",
+    "    pdb_path = fetch_structure(pdb_id)\n",
+    "    if not pdb_path:\n",
+    "        return None\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    if ext == '.cif':\n",
+    "        pdb_path = convert_cif_to_pdb(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n",
+    "    \"\"\"\n",
+    "    Create a PDB file with only the specified chain and replace B-factor with prediction scores\n",
+    "    \"\"\"\n",
+    "    # Read the original PDB file\n",
+    "    parser = PDBParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', input_pdb)\n",
+    "    \n",
+    "    # Prepare a new structure with only the specified chain\n",
+    "    new_structure = structure.copy()\n",
+    "    for model in new_structure:\n",
+    "        # Remove all chains except the specified one\n",
+    "        chains_to_remove = [chain for chain in model if chain.id != chain_id]\n",
+    "        for chain in chains_to_remove:\n",
+    "            model.detach_child(chain.id)\n",
+    "    \n",
+    "    # Create a modified PDB with scores in B-factor\n",
+    "    scores_dict = {resi: score for resi, score in residue_scores}\n",
+    "    for model in new_structure:\n",
+    "        for chain in model:\n",
+    "            for residue in chain:\n",
+    "                if residue.id[1] in scores_dict:\n",
+    "                    for atom in residue:\n",
+    "                        atom.bfactor = scores_dict[residue.id[1]] #* 100  # Scale score to B-factor range\n",
+    "    \n",
+    "    # Save the modified structure\n",
+    "    output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(new_structure)\n",
+    "    io.save(output_pdb)\n",
+    "    \n",
+    "    return output_pdb\n",
+    "\n",
+    "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n",
+    "    \"\"\"\n",
+    "    Calculate the geometric center of high-scoring residues\n",
+    "    \"\"\"\n",
+    "    parser = PDBParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', pdb_path)\n",
+    "    \n",
+    "    # Collect coordinates of CA atoms from high-scoring residues\n",
+    "    coords = []\n",
+    "    for model in structure:\n",
+    "        for chain in model:\n",
+    "            if chain.id == chain_id:\n",
+    "                for residue in chain:\n",
+    "                    if residue.id[1] in high_score_residues:\n",
+    "                        if 'CA' in residue:  # Use alpha carbon as representative\n",
+    "                            ca_atom = residue['CA']\n",
+    "                            coords.append(ca_atom.coord)\n",
+    "    \n",
+    "    # Calculate geometric center\n",
+    "    if coords:\n",
+    "        center = np.mean(coords, axis=0)\n",
+    "        return center\n",
+    "    return None\n",
+    "\n",
+    "def process_pdb(pdb_id_or_file, segment):\n",
+    "    # Determine if input is a PDB ID or file path\n",
+    "    if pdb_id_or_file.endswith('.pdb'):\n",
+    "        pdb_path = pdb_id_or_file\n",
+    "        pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n",
+    "    else:\n",
+    "        pdb_id = pdb_id_or_file\n",
+    "        pdb_path = fetch_pdb(pdb_id)\n",
+    "    \n",
+    "    if not pdb_path:\n",
+    "        return \"Failed to fetch PDB file\", None, None\n",
+    "    \n",
+    "    # Determine the file format and choose the appropriate parser\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
+    "    \n",
+    "    try:\n",
+    "        # Parse the structure file\n",
+    "        structure = parser.get_structure('protein', pdb_path)\n",
+    "    except Exception as e:\n",
+    "        return f\"Error parsing structure file: {e}\", None, None\n",
+    "    \n",
+    "    # Extract the specified chain\n",
+    "    try:\n",
+    "        chain = structure[0][segment]\n",
+    "    except KeyError:\n",
+    "        return \"Invalid Chain ID\", None, None\n",
+    "    \n",
+    "    protein_residues = [res for res in chain if is_aa(res)]\n",
+    "    sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
+    "    sequence_id = [res.id[1] for res in protein_residues]\n",
+    "    \n",
+    "    # Generate random scores for residues\n",
+    "    scores = np.random.rand(len(sequence))\n",
+    "    normalized_scores = normalize_scores(scores)\n",
+    "    \n",
+    "    # Zip residues with scores to track the residue ID and score\n",
+    "    residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
+    "\n",
+    "    # Identify high and mid scoring residues\n",
+    "    high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "    mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "\n",
+    "    # Calculate geometric center of high-scoring residues\n",
+    "    geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n",
+    "    pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n",
+    "    pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n",
+    "\n",
+    "    # Generate the result string\n",
+    "    current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
+    "    result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n",
+    "    result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n",
+    "    result_str += \"\\n\".join([\n",
+    "        f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
+    "        for i, res in enumerate(protein_residues)])\n",
+    "    \n",
+    "    # Create prediction and scored PDB files\n",
+    "    prediction_file = f\"{pdb_id}_predictions.txt\"\n",
+    "    with open(prediction_file, \"w\") as f:\n",
+    "        f.write(result_str)\n",
+    "\n",
+    "    # Create chain-specific PDB with scores in B-factor\n",
+    "    scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n",
+    "\n",
+    "    # Molecule visualization with updated script\n",
+    "    mol_vis = molecule(pdb_path, residue_scores, segment)\n",
+    "\n",
+    "    # Construct PyMOL command suggestions\n",
+    "    pymol_commands = f\"\"\"\n",
+    "PyMOL Visualization Commands:\n",
+    "1. Load PDB: load {os.path.abspath(pdb_path)}\n",
+    "2. Select high-scoring residues: {pymol_selection}\n",
+    "3. Highlight high-scoring residues: show sticks, high_score_residues\n",
+    "{pymol_center_cmd}\n",
+    "\"\"\"\n",
+    "    \n",
+    "    return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n",
+    "\n",
+    "# molecule() function remains the same as in the previous script, \n",
+    "# but modify the visualization script to ensure cartoon is below stick representations\n",
+    "\n",
+    "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
+    "    mol = read_mol(input_pdb)  # Read PDB file content\n",
+    "    \n",
+    "    # Prepare high-scoring residues script if scores are provided\n",
+    "    high_score_script = \"\"\n",
+    "    if residue_scores is not None:\n",
+    "        # Sort residues based on their scores\n",
+    "        high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "        mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "        \n",
+    "        high_score_script = \"\"\"\n",
+    "        // Reset all styles first\n",
+    "        viewer.getModel(0).setStyle({}, {});\n",
+    "        \n",
+    "        // First, set background cartoon style for the entire chain (underneath)\n",
+    "        viewer.getModel(0).setStyle(\n",
+    "            {\"chain\": \"%s\"}, \n",
+    "            { cartoon: {colorscheme:\"whiteCarbon\", opacity:0.7} }\n",
+    "        );\n",
+    "        \n",
+    "        // Highlight high-scoring residues with sticks on top\n",
+    "        let highScoreResidues = [%s];\n",
+    "        viewer.getModel(0).setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": highScoreResidues}, \n",
+    "            {\"stick\": {\"color\": \"red\", \"opacity\": 1}}\n",
+    "        );\n",
+    "\n",
+    "        // Highlight medium-scoring residues\n",
+    "        let midScoreResidues = [%s];\n",
+    "        viewer.getModel(0).setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": midScoreResidues}, \n",
+    "            {\"stick\": {\"color\": \"orange\", \"opacity\": 0.8}}\n",
+    "        );\n",
+    "        \"\"\" % (segment, \n",
+    "               \", \".join(str(resi) for resi in high_score_residues),\n",
+    "               segment,\n",
+    "               \", \".join(str(resi) for resi in mid_score_residues),\n",
+    "               segment)\n",
+    "    \n",
+    "    # Rest of the molecule() function remains the same as in the previous script\n",
+    "    \n",
+    "    html_content = f\"\"\"\n",
+    "    <!DOCTYPE html>\n",
+    "    <html>\n",
+    "    <head>    \n",
+    "        <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
+    "        <style>\n",
+    "        .mol-container {{\n",
+    "            width: 100%;\n",
+    "            height: 700px;\n",
+    "            position: relative;\n",
+    "        }}\n",
+    "        </style>\n",
+    "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
+    "        <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
+    "    </head>\n",
+    "    <body>\n",
+    "        <div id=\"container\" class=\"mol-container\"></div>\n",
+    "        <script>\n",
+    "            let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
+    "            $(document).ready(function () {{\n",
+    "                let element = $(\"#container\");\n",
+    "                let config = {{ backgroundColor: \"white\" }};\n",
+    "                let viewer = $3Dmol.createViewer(element, config);\n",
+    "                viewer.addModel(pdb, \"pdb\");\n",
+    "                \n",
+    "                {high_score_script}\n",
+    "                \n",
+    "                // Add hover functionality (unchanged from before)\n",
+    "                viewer.setHoverable(\n",
+    "                    {{}}, \n",
+    "                    true, \n",
+    "                    function(atom, viewer, event, container) {{\n",
+    "                        if (!atom.label) {{\n",
+    "                            atom.label = viewer.addLabel(\n",
+    "                                atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
+    "                                {{\n",
+    "                                    position: atom, \n",
+    "                                    backgroundColor: 'mintcream', \n",
+    "                                    fontColor: 'black',\n",
+    "                                    fontSize: 12,\n",
+    "                                    padding: 2\n",
+    "                                }}\n",
+    "                            );\n",
+    "                        }}\n",
+    "                    }},\n",
+    "                    function(atom, viewer) {{\n",
+    "                        if (atom.label) {{\n",
+    "                            viewer.removeLabel(atom.label);\n",
+    "                            delete atom.label;\n",
+    "                        }}\n",
+    "                    }}\n",
+    "                );\n",
+    "                \n",
+    "                viewer.zoomTo();\n",
+    "                viewer.render();\n",
+    "                viewer.zoom(0.8, 2000);\n",
+    "            }});\n",
+    "        </script>\n",
+    "    </body>\n",
+    "    </html>\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    # Return the HTML content within an iframe safely encoded for special characters\n",
+    "    return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
+    "\n",
+    "# Gradio UI\n",
+    "with gr.Blocks() as demo:\n",
+    "    gr.Markdown(\"# Protein Binding Site Prediction\")\n",
+    "    \n",
+    "    with gr.Row():\n",
+    "        pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
+    "        file_input = gr.File(label=\"Or Upload PDB File\", file_types=['.pdb'], type=\"filepath\")\n",
+    "        visualize_btn = gr.Button(\"Visualize Structure\")\n",
+    "\n",
+    "    molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n",
+    "        {\n",
+    "            \"model\": 0,\n",
+    "            \"style\": \"cartoon\",\n",
+    "            \"color\": \"whiteCarbon\",\n",
+    "            \"residue_range\": \"\",\n",
+    "            \"around\": 0,\n",
+    "            \"byres\": False,\n",
+    "        }\n",
+    "    ])\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
+    "        prediction_btn = gr.Button(\"Predict Binding Site\")\n",
+    "\n",
+    "    def process_input(pdb_id, uploaded_file):\n",
+    "        \"\"\"\n",
+    "        Determine whether to use PDB ID or uploaded file\n",
+    "        \"\"\"\n",
+    "        if uploaded_file and uploaded_file.endswith('.pdb'):\n",
+    "            return uploaded_file\n",
+    "        return pdb_id\n",
+    "\n",
+    "    molecule_output = gr.HTML(label=\"Protein Structure\")\n",
+    "    predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
+    "    download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n",
+    "    \n",
+    "    prediction_btn.click(\n",
+    "        process_pdb, \n",
+    "        inputs=[\n",
+    "            gr.State(lambda: process_input(pdb_input.value, file_input.value)), \n",
+    "            segment_input\n",
+    "        ], \n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "    visualize_btn.click(\n",
+    "        fetch_pdb, \n",
+    "        inputs=[pdb_input], \n",
+    "        outputs=molecule_output2\n",
+    "    )\n",
+    "\n",
+    "    gr.Markdown(\"## Examples\")\n",
+    "    gr.Examples(\n",
+    "        examples=[\n",
+    "            [\"7RPZ\", \"A\"],\n",
+    "            [\"2IWI\", \"B\"],\n",
+    "            [\"2F6V\", \"A\"]\n",
+    "        ],\n",
+    "        inputs=[pdb_input, segment_input],\n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "demo.launch(share=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "5b266025-7503-48f5-9371-3642d09f7e93",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7890\n",
+      "* Running on public URL: https://70a6e80d8deb42ddd0.gradio.live\n",
+      "\n",
+      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"https://70a6e80d8deb42ddd0.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from datetime import datetime\n",
+    "import gradio as gr\n",
+    "import numpy as np\n",
+    "import requests\n",
+    "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n",
+    "from Bio.PDB.Polypeptide import is_aa\n",
+    "from Bio.SeqUtils import seq1\n",
+    "from gradio_molecule3d import Molecule3D\n",
+    "from typing import Optional, Tuple\n",
+    "\n",
+    "def normalize_scores(scores):\n",
+    "    min_score = np.min(scores)\n",
+    "    max_score = np.max(scores)\n",
+    "    return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
+    "\n",
+    "def read_mol(pdb_path):\n",
+    "    \"\"\"Read PDB file and return its content as a string\"\"\"\n",
+    "    with open(pdb_path, 'r') as f:\n",
+    "        return f.read()\n",
+    "\n",
+    "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
+    "    If a structure file already exists locally, it uses that.\n",
+    "    \"\"\"\n",
+    "    file_path = download_structure(pdb_id, output_dir)\n",
+    "    if file_path:\n",
+    "        return file_path\n",
+    "    else:\n",
+    "        return None\n",
+    "\n",
+    "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Attempt to download the structure file in CIF or PDB format.\n",
+    "    Returns the path to the downloaded file, or None if download fails.\n",
+    "    \"\"\"\n",
+    "    for ext in ['.cif', '.pdb']:\n",
+    "        file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
+    "        if os.path.exists(file_path):\n",
+    "            return file_path\n",
+    "        url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
+    "        try:\n",
+    "            response = requests.get(url, timeout=10)\n",
+    "            if response.status_code == 200:\n",
+    "                with open(file_path, 'wb') as f:\n",
+    "                    f.write(response.content)\n",
+    "                return file_path\n",
+    "        except Exception as e:\n",
+    "            print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
+    "    return None\n",
+    "\n",
+    "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
+    "    \"\"\"\n",
+    "    Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
+    "    \"\"\"\n",
+    "    pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
+    "    parser = MMCIFParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', cif_path)\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(structure)\n",
+    "    io.save(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def fetch_pdb(pdb_id):\n",
+    "    pdb_path = fetch_structure(pdb_id)\n",
+    "    if not pdb_path:\n",
+    "        return None\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    if ext == '.cif':\n",
+    "        pdb_path = convert_cif_to_pdb(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n",
+    "    \"\"\"\n",
+    "    Create a PDB file with only the specified chain and replace B-factor with prediction scores\n",
+    "    \"\"\"\n",
+    "    # Read the original PDB file\n",
+    "    parser = PDBParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', input_pdb)\n",
+    "    \n",
+    "    # Prepare a new structure with only the specified chain\n",
+    "    new_structure = structure.copy()\n",
+    "    for model in new_structure:\n",
+    "        # Remove all chains except the specified one\n",
+    "        chains_to_remove = [chain for chain in model if chain.id != chain_id]\n",
+    "        for chain in chains_to_remove:\n",
+    "            model.detach_child(chain.id)\n",
+    "    \n",
+    "    # Create a modified PDB with scores in B-factor\n",
+    "    scores_dict = {resi: score for resi, score in residue_scores}\n",
+    "    for model in new_structure:\n",
+    "        for chain in model:\n",
+    "            for residue in chain:\n",
+    "                if residue.id[1] in scores_dict:\n",
+    "                    for atom in residue:\n",
+    "                        atom.bfactor = scores_dict[residue.id[1]] #* 100  # Scale score to B-factor range\n",
+    "    \n",
+    "    # Save the modified structure\n",
+    "    output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(new_structure)\n",
+    "    io.save(output_pdb)\n",
+    "    \n",
+    "    return output_pdb\n",
+    "\n",
+    "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n",
+    "    \"\"\"\n",
+    "    Calculate the geometric center of high-scoring residues\n",
+    "    \"\"\"\n",
+    "    parser = PDBParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', pdb_path)\n",
+    "    \n",
+    "    # Collect coordinates of CA atoms from high-scoring residues\n",
+    "    coords = []\n",
+    "    for model in structure:\n",
+    "        for chain in model:\n",
+    "            if chain.id == chain_id:\n",
+    "                for residue in chain:\n",
+    "                    if residue.id[1] in high_score_residues:\n",
+    "                        if 'CA' in residue:  # Use alpha carbon as representative\n",
+    "                            ca_atom = residue['CA']\n",
+    "                            coords.append(ca_atom.coord)\n",
+    "    \n",
+    "    # Calculate geometric center\n",
+    "    if coords:\n",
+    "        center = np.mean(coords, axis=0)\n",
+    "        return center\n",
+    "    return None\n",
+    "\n",
+    "def process_pdb(pdb_id_or_file, segment):\n",
+    "    # Determine if input is a PDB ID or file path\n",
+    "    if pdb_id_or_file.endswith('.pdb'):\n",
+    "        pdb_path = pdb_id_or_file\n",
+    "        pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n",
+    "    else:\n",
+    "        pdb_id = pdb_id_or_file\n",
+    "        pdb_path = fetch_pdb(pdb_id)\n",
+    "    \n",
+    "    if not pdb_path:\n",
+    "        return \"Failed to fetch PDB file\", None, None\n",
+    "    \n",
+    "    # Determine the file format and choose the appropriate parser\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
+    "    \n",
+    "    try:\n",
+    "        # Parse the structure file\n",
+    "        structure = parser.get_structure('protein', pdb_path)\n",
+    "    except Exception as e:\n",
+    "        return f\"Error parsing structure file: {e}\", None, None\n",
+    "    \n",
+    "    # Extract the specified chain\n",
+    "    try:\n",
+    "        chain = structure[0][segment]\n",
+    "    except KeyError:\n",
+    "        return \"Invalid Chain ID\", None, None\n",
+    "    \n",
+    "    protein_residues = [res for res in chain if is_aa(res)]\n",
+    "    sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
+    "    sequence_id = [res.id[1] for res in protein_residues]\n",
+    "    \n",
+    "    # Generate random scores for residues\n",
+    "    scores = np.random.rand(len(sequence))\n",
+    "    normalized_scores = normalize_scores(scores)\n",
+    "    \n",
+    "    # Zip residues with scores to track the residue ID and score\n",
+    "    residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
+    "\n",
+    "    # Identify high and mid scoring residues\n",
+    "    high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "    mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "\n",
+    "    # Calculate geometric center of high-scoring residues\n",
+    "    geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n",
+    "    pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n",
+    "    pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n",
+    "\n",
+    "    # Generate the result string\n",
+    "    current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
+    "    result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n",
+    "    result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n",
+    "    result_str += \"\\n\".join([\n",
+    "        f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
+    "        for i, res in enumerate(protein_residues)])\n",
+    "    \n",
+    "    # Create prediction and scored PDB files\n",
+    "    prediction_file = f\"{pdb_id}_predictions.txt\"\n",
+    "    with open(prediction_file, \"w\") as f:\n",
+    "        f.write(result_str)\n",
+    "\n",
+    "    # Create chain-specific PDB with scores in B-factor\n",
+    "    scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n",
+    "\n",
+    "    # Molecule visualization with updated script\n",
+    "    mol_vis = molecule(pdb_path, residue_scores, segment)\n",
+    "\n",
+    "    # Construct PyMOL command suggestions\n",
+    "    pymol_commands = f\"\"\"\n",
+    "PyMOL Visualization Commands:\n",
+    "1. Load PDB: load {os.path.abspath(pdb_path)}\n",
+    "2. Select high-scoring residues: {pymol_selection}\n",
+    "3. Highlight high-scoring residues: show sticks, high_score_residues\n",
+    "{pymol_center_cmd}\n",
+    "\"\"\"\n",
+    "    \n",
+    "    return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n",
+    "\n",
+    "# molecule() function remains the same as in the previous script, \n",
+    "# but modify the visualization script to ensure cartoon is below stick representations\n",
+    "\n",
+    "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
+    "    mol = read_mol(input_pdb)  # Read PDB file content\n",
+    "\n",
+    "    # Prepare high-scoring residues script if scores are provided\n",
+    "    high_score_script = \"\"\n",
+    "    if residue_scores is not None:\n",
+    "        # Filter residues based on their scores\n",
+    "        high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "        mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "        \n",
+    "        high_score_script = \"\"\"\n",
+    "        // Load the original model and apply white cartoon style\n",
+    "        let chainModel = viewer.addModel(pdb, \"pdb\");\n",
+    "        chainModel.setStyle(\n",
+    "            {\"chain\": \"%s\"}, \n",
+    "            {\"cartoon\": {\"color\": \"white\"}}\n",
+    "        );\n",
+    "\n",
+    "        // Create a new model for high-scoring residues and apply red sticks style\n",
+    "        let highScoreModel = viewer.addModel(pdb, \"pdb\");\n",
+    "        highScoreModel.setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": [%s]}, \n",
+    "            {\"stick\": {\"color\": \"red\"}}\n",
+    "        );\n",
+    "\n",
+    "        // Create a new model for medium-scoring residues and apply orange sticks style\n",
+    "        let midScoreModel = viewer.addModel(pdb, \"pdb\");\n",
+    "        midScoreModel.setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": [%s]}, \n",
+    "            {\"stick\": {\"color\": \"orange\"}}\n",
+    "        );\n",
+    "        \"\"\" % (\n",
+    "            segment,\n",
+    "            segment,\n",
+    "            \", \".join(str(resi) for resi in high_score_residues),\n",
+    "            segment,\n",
+    "            \", \".join(str(resi) for resi in mid_score_residues)\n",
+    "        )\n",
+    "    \n",
+    "    # Generate the full HTML content\n",
+    "    html_content = f\"\"\"\n",
+    "    <!DOCTYPE html>\n",
+    "    <html>\n",
+    "    <head>    \n",
+    "        <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
+    "        <style>\n",
+    "        .mol-container {{\n",
+    "            width: 100%;\n",
+    "            height: 700px;\n",
+    "            position: relative;\n",
+    "        }}\n",
+    "        </style>\n",
+    "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
+    "        <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
+    "    </head>\n",
+    "    <body>\n",
+    "        <div id=\"container\" class=\"mol-container\"></div>\n",
+    "        <script>\n",
+    "            let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
+    "            $(document).ready(function () {{\n",
+    "                let element = $(\"#container\");\n",
+    "                let config = {{ backgroundColor: \"white\" }};\n",
+    "                let viewer = $3Dmol.createViewer(element, config);\n",
+    "                \n",
+    "                {high_score_script}\n",
+    "                \n",
+    "                // Add hover functionality\n",
+    "                viewer.setHoverable(\n",
+    "                    {{}}, \n",
+    "                    true, \n",
+    "                    function(atom, viewer, event, container) {{\n",
+    "                        if (!atom.label) {{\n",
+    "                            atom.label = viewer.addLabel(\n",
+    "                                atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
+    "                                {{\n",
+    "                                    position: atom, \n",
+    "                                    backgroundColor: 'mintcream', \n",
+    "                                    fontColor: 'black',\n",
+    "                                    fontSize: 12,\n",
+    "                                    padding: 2\n",
+    "                                }}\n",
+    "                            );\n",
+    "                        }}\n",
+    "                    }},\n",
+    "                    function(atom, viewer) {{\n",
+    "                        if (atom.label) {{\n",
+    "                            viewer.removeLabel(atom.label);\n",
+    "                            delete atom.label;\n",
+    "                        }}\n",
+    "                    }}\n",
+    "                );\n",
+    "                \n",
+    "                viewer.zoomTo();\n",
+    "                viewer.render();\n",
+    "                viewer.zoom(0.8, 2000);\n",
+    "            }});\n",
+    "        </script>\n",
+    "    </body>\n",
+    "    </html>\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    # Return the HTML content within an iframe safely encoded for special characters\n",
+    "    return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
+    "\n",
+    "\n",
+    "# Gradio UI\n",
+    "with gr.Blocks() as demo:\n",
+    "    gr.Markdown(\"# Protein Binding Site Prediction\")\n",
+    "    \n",
+    "    with gr.Row():\n",
+    "        pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
+    "        visualize_btn = gr.Button(\"Visualize Structure\")\n",
+    "\n",
+    "    molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n",
+    "        {\n",
+    "            \"model\": 0,\n",
+    "            \"style\": \"cartoon\",\n",
+    "            \"color\": \"whiteCarbon\",\n",
+    "            \"residue_range\": \"\",\n",
+    "            \"around\": 0,\n",
+    "            \"byres\": False,\n",
+    "        }\n",
+    "    ])\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
+    "        prediction_btn = gr.Button(\"Predict Binding Site\")\n",
+    "\n",
+    "\n",
+    "    molecule_output = gr.HTML(label=\"Protein Structure\")\n",
+    "    predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
+    "    download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n",
+    "    \n",
+    "    prediction_btn.click(\n",
+    "        process_pdb, \n",
+    "        inputs=[\n",
+    "            pdb_input, \n",
+    "            segment_input\n",
+    "        ], \n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "    visualize_btn.click(\n",
+    "        fetch_pdb, \n",
+    "        inputs=[pdb_input], \n",
+    "        outputs=molecule_output2\n",
+    "    )\n",
+    "\n",
+    "    gr.Markdown(\"## Examples\")\n",
+    "    gr.Examples(\n",
+    "        examples=[\n",
+    "            [\"7RPZ\", \"A\"],\n",
+    "            [\"2IWI\", \"B\"],\n",
+    "            [\"2F6V\", \"A\"]\n",
+    "        ],\n",
+    "        inputs=[pdb_input, segment_input],\n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "demo.launch(share=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "514fad12-a31a-495f-af9e-04a18e11175e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "* Running on local URL:  http://127.0.0.1:7897\n",
+      "* Running on public URL: https://0d9b5d36fa5302e0df.gradio.live\n",
+      "\n",
+      "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"https://0d9b5d36fa5302e0df.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from datetime import datetime\n",
+    "import gradio as gr\n",
+    "import numpy as np\n",
+    "import requests\n",
+    "from Bio.PDB import PDBParser, MMCIFParser, PDBIO\n",
+    "from Bio.PDB.Polypeptide import is_aa\n",
+    "from Bio.SeqUtils import seq1\n",
+    "from gradio_molecule3d import Molecule3D\n",
+    "from typing import Optional, Tuple\n",
+    "\n",
+    "def normalize_scores(scores):\n",
+    "    min_score = np.min(scores)\n",
+    "    max_score = np.max(scores)\n",
+    "    return (scores - min_score) / (max_score - min_score) if max_score > min_score else scores\n",
+    "\n",
+    "def read_mol(pdb_path):\n",
+    "    \"\"\"Read PDB file and return its content as a string\"\"\"\n",
+    "    with open(pdb_path, 'r') as f:\n",
+    "        return f.read()\n",
+    "\n",
+    "def fetch_structure(pdb_id: str, output_dir: str = \".\") -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Fetch the structure file for a given PDB ID. Prioritizes CIF files.\n",
+    "    If a structure file already exists locally, it uses that.\n",
+    "    \"\"\"\n",
+    "    file_path = download_structure(pdb_id, output_dir)\n",
+    "    if file_path:\n",
+    "        return file_path\n",
+    "    else:\n",
+    "        return None\n",
+    "\n",
+    "def download_structure(pdb_id: str, output_dir: str) -> Optional[str]:\n",
+    "    \"\"\"\n",
+    "    Attempt to download the structure file in CIF or PDB format.\n",
+    "    Returns the path to the downloaded file, or None if download fails.\n",
+    "    \"\"\"\n",
+    "    for ext in ['.cif', '.pdb']:\n",
+    "        file_path = os.path.join(output_dir, f\"{pdb_id}{ext}\")\n",
+    "        if os.path.exists(file_path):\n",
+    "            return file_path\n",
+    "        url = f\"https://files.rcsb.org/download/{pdb_id}{ext}\"\n",
+    "        try:\n",
+    "            response = requests.get(url, timeout=10)\n",
+    "            if response.status_code == 200:\n",
+    "                with open(file_path, 'wb') as f:\n",
+    "                    f.write(response.content)\n",
+    "                return file_path\n",
+    "        except Exception as e:\n",
+    "            print(f\"Download error for {pdb_id}{ext}: {e}\")\n",
+    "    return None\n",
+    "\n",
+    "def convert_cif_to_pdb(cif_path: str, output_dir: str = \".\") -> str:\n",
+    "    \"\"\"\n",
+    "    Convert a CIF file to PDB format using BioPython and return the PDB file path.\n",
+    "    \"\"\"\n",
+    "    pdb_path = os.path.join(output_dir, os.path.basename(cif_path).replace('.cif', '.pdb'))\n",
+    "    parser = MMCIFParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', cif_path)\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(structure)\n",
+    "    io.save(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def fetch_pdb(pdb_id):\n",
+    "    pdb_path = fetch_structure(pdb_id)\n",
+    "    if not pdb_path:\n",
+    "        return None\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    if ext == '.cif':\n",
+    "        pdb_path = convert_cif_to_pdb(pdb_path)\n",
+    "    return pdb_path\n",
+    "\n",
+    "def create_chain_specific_pdb(input_pdb: str, chain_id: str, residue_scores: list) -> str:\n",
+    "    \"\"\"\n",
+    "    Create a PDB file with only the specified chain and replace B-factor with prediction scores\n",
+    "    \"\"\"\n",
+    "    # Read the original PDB file\n",
+    "    parser = PDBParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', input_pdb)\n",
+    "    \n",
+    "    # Prepare a new structure with only the specified chain\n",
+    "    new_structure = structure.copy()\n",
+    "    for model in new_structure:\n",
+    "        # Remove all chains except the specified one\n",
+    "        chains_to_remove = [chain for chain in model if chain.id != chain_id]\n",
+    "        for chain in chains_to_remove:\n",
+    "            model.detach_child(chain.id)\n",
+    "    \n",
+    "    # Create a modified PDB with scores in B-factor\n",
+    "    scores_dict = {resi: score for resi, score in residue_scores}\n",
+    "    for model in new_structure:\n",
+    "        for chain in model:\n",
+    "            for residue in chain:\n",
+    "                if residue.id[1] in scores_dict:\n",
+    "                    for atom in residue:\n",
+    "                        atom.bfactor = scores_dict[residue.id[1]] #* 100  # Scale score to B-factor range\n",
+    "    \n",
+    "    # Save the modified structure\n",
+    "    output_pdb = f\"{os.path.splitext(input_pdb)[0]}_{chain_id}_scored.pdb\"\n",
+    "    io = PDBIO()\n",
+    "    io.set_structure(new_structure)\n",
+    "    io.save(output_pdb)\n",
+    "    \n",
+    "    return output_pdb\n",
+    "\n",
+    "def calculate_geometric_center(pdb_path: str, high_score_residues: list, chain_id: str):\n",
+    "    \"\"\"\n",
+    "    Calculate the geometric center of high-scoring residues\n",
+    "    \"\"\"\n",
+    "    parser = PDBParser(QUIET=True)\n",
+    "    structure = parser.get_structure('protein', pdb_path)\n",
+    "    \n",
+    "    # Collect coordinates of CA atoms from high-scoring residues\n",
+    "    coords = []\n",
+    "    for model in structure:\n",
+    "        for chain in model:\n",
+    "            if chain.id == chain_id:\n",
+    "                for residue in chain:\n",
+    "                    if residue.id[1] in high_score_residues:\n",
+    "                        if 'CA' in residue:  # Use alpha carbon as representative\n",
+    "                            ca_atom = residue['CA']\n",
+    "                            coords.append(ca_atom.coord)\n",
+    "    \n",
+    "    # Calculate geometric center\n",
+    "    if coords:\n",
+    "        center = np.mean(coords, axis=0)\n",
+    "        return center\n",
+    "    return None\n",
+    "\n",
+    "def process_pdb(pdb_id_or_file, segment):\n",
+    "    # Determine if input is a PDB ID or file path\n",
+    "    if pdb_id_or_file.endswith('.pdb'):\n",
+    "        pdb_path = pdb_id_or_file\n",
+    "        pdb_id = os.path.splitext(os.path.basename(pdb_path))[0]\n",
+    "    else:\n",
+    "        pdb_id = pdb_id_or_file\n",
+    "        pdb_path = fetch_pdb(pdb_id)\n",
+    "    \n",
+    "    if not pdb_path:\n",
+    "        return \"Failed to fetch PDB file\", None, None\n",
+    "    \n",
+    "    # Determine the file format and choose the appropriate parser\n",
+    "    _, ext = os.path.splitext(pdb_path)\n",
+    "    parser = MMCIFParser(QUIET=True) if ext == '.cif' else PDBParser(QUIET=True)\n",
+    "    \n",
+    "    try:\n",
+    "        # Parse the structure file\n",
+    "        structure = parser.get_structure('protein', pdb_path)\n",
+    "    except Exception as e:\n",
+    "        return f\"Error parsing structure file: {e}\", None, None\n",
+    "    \n",
+    "    # Extract the specified chain\n",
+    "    try:\n",
+    "        chain = structure[0][segment]\n",
+    "    except KeyError:\n",
+    "        return \"Invalid Chain ID\", None, None\n",
+    "    \n",
+    "    protein_residues = [res for res in chain if is_aa(res)]\n",
+    "    sequence = \"\".join(seq1(res.resname) for res in protein_residues)\n",
+    "    sequence_id = [res.id[1] for res in protein_residues]\n",
+    "    \n",
+    "    # Generate random scores for residues\n",
+    "    scores = np.random.rand(len(sequence))\n",
+    "    normalized_scores = normalize_scores(scores)\n",
+    "    \n",
+    "    # Zip residues with scores to track the residue ID and score\n",
+    "    residue_scores = [(resi, score) for resi, score in zip(sequence_id, normalized_scores)]\n",
+    "\n",
+    "    # Identify high and mid scoring residues\n",
+    "    high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "    mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "\n",
+    "    # Calculate geometric center of high-scoring residues\n",
+    "    geo_center = calculate_geometric_center(pdb_path, high_score_residues, segment)\n",
+    "    pymol_selection = f\"select high_score_residues, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\"\n",
+    "    pymol_center_cmd = f\"show spheres, resi {'+'.join(map(str, high_score_residues))} and chain {segment}\" if geo_center is not None else \"\"\n",
+    "\n",
+    "    # Generate the result string\n",
+    "    current_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
+    "    result_str = f\"Prediction for PDB: {pdb_id}, Chain: {segment}\\nDate: {current_time}\\n\\n\"\n",
+    "    result_str += \"Columns: Residue Name, Residue Number, One-letter Code, Normalized Score\\n\\n\"\n",
+    "    result_str += \"\\n\".join([\n",
+    "        f\"{res.resname} {res.id[1]} {sequence[i]} {normalized_scores[i]:.2f}\" \n",
+    "        for i, res in enumerate(protein_residues)])\n",
+    "    \n",
+    "    # Create prediction and scored PDB files\n",
+    "    prediction_file = f\"{pdb_id}_predictions.txt\"\n",
+    "    with open(prediction_file, \"w\") as f:\n",
+    "        f.write(result_str)\n",
+    "\n",
+    "    # Create chain-specific PDB with scores in B-factor\n",
+    "    scored_pdb = create_chain_specific_pdb(pdb_path, segment, residue_scores)\n",
+    "\n",
+    "    # Molecule visualization with updated script\n",
+    "    mol_vis = molecule(pdb_path, residue_scores, segment)\n",
+    "\n",
+    "    # Construct PyMOL command suggestions\n",
+    "    pymol_commands = f\"\"\"\n",
+    "PyMOL Visualization Commands:\n",
+    "1. Load PDB: load {os.path.abspath(pdb_path)}\n",
+    "2. Select high-scoring residues: {pymol_selection}\n",
+    "3. Highlight high-scoring residues: show sticks, high_score_residues\n",
+    "{pymol_center_cmd}\n",
+    "\"\"\"\n",
+    "    \n",
+    "    return result_str + \"\\n\\n\" + pymol_commands, mol_vis, [prediction_file, scored_pdb]\n",
+    "\n",
+    "def molecule(input_pdb, residue_scores=None, segment='A'):\n",
+    "    mol = read_mol(input_pdb)  # Read PDB file content\n",
+    "\n",
+    "    # Prepare high-scoring residues script if scores are provided\n",
+    "    high_score_script = \"\"\n",
+    "    if residue_scores is not None:\n",
+    "        # Filter residues based on their scores\n",
+    "        high_score_residues = [resi for resi, score in residue_scores if score > 0.75]\n",
+    "        mid_score_residues = [resi for resi, score in residue_scores if 0.5 < score <= 0.75]\n",
+    "        \n",
+    "        high_score_script = \"\"\"\n",
+    "        // Load the original model and apply white cartoon style\n",
+    "        let chainModel = viewer.addModel(pdb, \"pdb\");\n",
+    "        chainModel.setStyle({}, {});\n",
+    "        chainModel.setStyle(\n",
+    "            {\"chain\": \"%s\"}, \n",
+    "            {\"cartoon\": {\"color\": \"white\"}}\n",
+    "        );\n",
+    "\n",
+    "        // Create a new model for high-scoring residues and apply red sticks style\n",
+    "        let highScoreModel = viewer.addModel(pdb, \"pdb\");\n",
+    "        highScoreModel.setStyle({}, {});\n",
+    "        highScoreModel.setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": [%s]}, \n",
+    "            {\"stick\": {\"color\": \"red\"}}\n",
+    "        );\n",
+    "\n",
+    "        // Create a new model for medium-scoring residues and apply orange sticks style\n",
+    "        let midScoreModel = viewer.addModel(pdb, \"pdb\");\n",
+    "        midScoreModel.setStyle({}, {});\n",
+    "        midScoreModel.setStyle(\n",
+    "            {\"chain\": \"%s\", \"resi\": [%s]}, \n",
+    "            {\"stick\": {\"color\": \"orange\"}}\n",
+    "        );\n",
+    "        \"\"\" % (\n",
+    "            segment,\n",
+    "            segment,\n",
+    "            \", \".join(str(resi) for resi in high_score_residues),\n",
+    "            segment,\n",
+    "            \", \".join(str(resi) for resi in mid_score_residues)\n",
+    "        )\n",
+    "    \n",
+    "    # Generate the full HTML content\n",
+    "    html_content = f\"\"\"\n",
+    "    <!DOCTYPE html>\n",
+    "    <html>\n",
+    "    <head>    \n",
+    "        <meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\" />\n",
+    "        <style>\n",
+    "        .mol-container {{\n",
+    "            width: 100%;\n",
+    "            height: 700px;\n",
+    "            position: relative;\n",
+    "        }}\n",
+    "        </style>\n",
+    "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.6.3/jquery.min.js\"></script>\n",
+    "        <script src=\"https://3Dmol.csb.pitt.edu/build/3Dmol-min.js\"></script>\n",
+    "    </head>\n",
+    "    <body>\n",
+    "        <div id=\"container\" class=\"mol-container\"></div>\n",
+    "        <script>\n",
+    "            let pdb = `{mol}`; // Use template literal to properly escape PDB content\n",
+    "            $(document).ready(function () {{\n",
+    "                let element = $(\"#container\");\n",
+    "                let config = {{ backgroundColor: \"white\" }};\n",
+    "                let viewer = $3Dmol.createViewer(element, config);\n",
+    "                \n",
+    "                {high_score_script}\n",
+    "                \n",
+    "                // Add hover functionality\n",
+    "                viewer.setHoverable(\n",
+    "                    {{}}, \n",
+    "                    true, \n",
+    "                    function(atom, viewer, event, container) {{\n",
+    "                        if (!atom.label) {{\n",
+    "                            atom.label = viewer.addLabel(\n",
+    "                                atom.resn + \":\" +atom.resi + \":\" + atom.atom, \n",
+    "                                {{\n",
+    "                                    position: atom, \n",
+    "                                    backgroundColor: 'mintcream', \n",
+    "                                    fontColor: 'black',\n",
+    "                                    fontSize: 12,\n",
+    "                                    padding: 2\n",
+    "                                }}\n",
+    "                            );\n",
+    "                        }}\n",
+    "                    }},\n",
+    "                    function(atom, viewer) {{\n",
+    "                        if (atom.label) {{\n",
+    "                            viewer.removeLabel(atom.label);\n",
+    "                            delete atom.label;\n",
+    "                        }}\n",
+    "                    }}\n",
+    "                );\n",
+    "                \n",
+    "                viewer.zoomTo();\n",
+    "                viewer.render();\n",
+    "                viewer.zoom(0.8, 2000);\n",
+    "            }});\n",
+    "        </script>\n",
+    "    </body>\n",
+    "    </html>\n",
+    "    \"\"\"\n",
+    "    \n",
+    "    # Return the HTML content within an iframe safely encoded for special characters\n",
+    "    return f'<iframe width=\"100%\" height=\"700\" srcdoc=\"{html_content.replace(chr(34), \"&quot;\").replace(chr(39), \"&#39;\")}\"></iframe>'\n",
+    "\n",
+    "\n",
+    "# Gradio UI\n",
+    "with gr.Blocks() as demo:\n",
+    "    gr.Markdown(\"# Protein Binding Site Prediction\")\n",
+    "    \n",
+    "    with gr.Row():\n",
+    "        pdb_input = gr.Textbox(value=\"4BDU\", label=\"PDB ID\", placeholder=\"Enter PDB ID here...\")\n",
+    "        visualize_btn = gr.Button(\"Visualize Structure\")\n",
+    "\n",
+    "    molecule_output2 = Molecule3D(label=\"Protein Structure\", reps=[\n",
+    "        {\n",
+    "            \"model\": 0,\n",
+    "            \"style\": \"cartoon\",\n",
+    "            \"color\": \"whiteCarbon\",\n",
+    "            \"residue_range\": \"\",\n",
+    "            \"around\": 0,\n",
+    "            \"byres\": False,\n",
+    "        }\n",
+    "    ])\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        segment_input = gr.Textbox(value=\"A\", label=\"Chain ID\", placeholder=\"Enter Chain ID here...\")\n",
+    "        prediction_btn = gr.Button(\"Predict Binding Site\")\n",
+    "\n",
+    "\n",
+    "    molecule_output = gr.HTML(label=\"Protein Structure\")\n",
+    "    predictions_output = gr.Textbox(label=\"Binding Site Predictions\")\n",
+    "    download_output = gr.File(label=\"Download Files\", file_count=\"multiple\")\n",
+    "    \n",
+    "    prediction_btn.click(\n",
+    "        process_pdb, \n",
+    "        inputs=[\n",
+    "            pdb_input, \n",
+    "            segment_input\n",
+    "        ], \n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "    visualize_btn.click(\n",
+    "        fetch_pdb, \n",
+    "        inputs=[pdb_input], \n",
+    "        outputs=molecule_output2\n",
+    "    )\n",
+    "\n",
+    "    gr.Markdown(\"## Examples\")\n",
+    "    gr.Examples(\n",
+    "        examples=[\n",
+    "            [\"7RPZ\", \"A\"],\n",
+    "            [\"2IWI\", \"B\"],\n",
+    "            [\"2F6V\", \"A\"]\n",
+    "        ],\n",
+    "        inputs=[pdb_input, segment_input],\n",
+    "        outputs=[predictions_output, molecule_output, download_output]\n",
+    "    )\n",
+    "\n",
+    "demo.launch(share=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f960cc2-8330-40f1-b54d-693ce922fa74",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cec41eef-c414-440f-a0ea-63fc8d3acf0b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (LLM)",
+   "language": "python",
+   "name": "llm"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}