Spaces:

simonduerr
/

metal3d

Running

App Files Files Community

Simon Duerr commited on Jun 2, 2022

Commit

b4346be

•

1 Parent(s): 3897ec7

add files

Browse files

Files changed (9) hide show

app.py +245 -11
utils/__pycache__/helpers.cpython-38.pyc +0 -0
utils/__pycache__/model.cpython-38.pyc +0 -0
utils/__pycache__/voxelization.cpython-38.pyc +0 -0
utils/helpers.py +272 -0
utils/model.py +42 -0
utils/voxelization.py +209 -0
weights/Metal3D.pth +3 -0
weights/metal_0.5A_v3_d0.2_16Abox.pth +3 -0

app.py CHANGED Viewed

@@ -1,19 +1,253 @@
 import gradio as gr
-def update(name):
-    return f"Welcome to Gradio, {name}!"
-demo = gr.Blocks()
-with demo:
     gr.Markdown("# Metal3D")
     with gr.Group():
-        inp = gr.Textbox(placeholder="2CBA", label="PDB or Uniprot code")
-        file  = gr.File(file_count=1, label="Upload a PDB file")
-        btn = gr.Button("Run Metal3D")
-    out = gr.Textbox()
     mol = gr.HTML()
-    btn.click(fn=update, inputs=inp, outputs=out)
-demo.launch()

 import gradio as gr
+import urllib
+import re
+import sys
+import warnings
+import torch
+import torch.nn as nn
+import ipywidgets as widgets
+from ipywidgets import interact, fixed
+from utils.helpers import *
+from utils.voxelization import processStructures
+from utils.model import Model
+import numpy as np
+import os
+def update(inp, file, mode):
+    try:
+        pdb_file = file.name
+    except:
+        print("using pdbfile")
+    try:
+        pdb_file = inp
+        if (
+            re.match(
+                "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}",
+                pdb_file,
+            ).group()
+            == pdb_file
+        ):
+            urllib.request.urlretrieve(
+                f"https://alphafold.ebi.ac.uk/files/AF-{pdb_file}-F1-model_v2.pdb",
+                f"files/{pdb_file}.pdb",
+            )
+    except AttributeError:
+        if len(inp) == 4:
+            pdb_file = inp
+            urllib.request.urlretrieve(
+                f"http://files.rcsb.org/download/{pdb_file.lower()}.pdb1",
+                f"files/{pdb_file}.pdb",
+            )
+        else:
+            return "pdb code must be 4 letters or Uniprot code does not match", ""
+    if mode == "All residues":
+        ids = get_all_protein_resids(
+            f"files/{pdb_file}.pdb",
+        )
+    else:
+        ids = get_all_metalbinding_resids(f"files/{pdb_file}.pdb")
+    voxels, prot_centers, prot_N, prots = processStructures(pdb_file, ids)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    voxels.to(device)
+    print(voxels.shape)
+    model = Model()
+    model.to(device)
+    model.load_state_dict(torch.load("weights/metal_0.5A_v3_d0.2_16Abox.pth"))
+    model.eval()
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore")
+        output = model(voxels)
+    print(output.shape)
+    prot_v = np.vstack(prot_centers)
+    output_v = output.flatten().cpu().detach().numpy()
+    bb = get_bb(prot_v)
+    gridres = 0.5
+    grid, box_N = create_grid_fromBB(bb, voxelSize=gridres)
+    probability_values = get_probability_mean(grid, prot_v, output_v)
+    print(probability_values.shape)
+    write_cubefile(
+        bb,
+        probability_values,
+        box_N,
+        outname=f"output/metal_{pdb_file}.cube",
+        gridres=gridres,
+    )
+    message = find_unique_sites(
+        probability_values,
+        grid,
+        writeprobes=True,
+        probefile=f"output/probes_{pdb_file}.pdb",
+        threshold=7,
+        p=0.15,
+    )
+    return message, molecule(
+        f"files/{pdb_file}.pdb",
+        f"output/probes_{pdb_file}.pdb",
+        f"output/metal_{pdb_file}.cube",
+    )
+def test():
+    x = """<!DOCTYPE html>
+        <html>
+        <head>
+    <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
+    </head>
+    <body>
+    <script src="https://3Dmol.org/build/3Dmol-min.js" async></script> <div style="height: 400px; width: 400px; position: relative;" class="viewer_3Dmoljs" data-pdb="2POR" data-backgroundcolor="0xffffff" data-style="stick" ></div>
+        </body></html>"""
+    return f"""<iframe style="width: 100%; height: 480px" name="result" allow="midi; geolocation; microphone; camera;
+    display-capture; encrypted-media;" sandbox="allow-modals allow-forms
+    allow-scripts allow-same-origin allow-popups
+    allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
+    allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
+def read_mol(molpath):
+    with open(molpath, "r") as fp:
+        lines = fp.readlines()
+    mol = ""
+    for l in lines:
+        mol += l
+    return mol
+def molecule(pdb, probes, cube):
+    mol = read_mol(pdb)
+    probes = read_mol(probes)
+    cubefile = read_mol(cube)
+    x = (
+        """<!DOCTYPE html>
+        <html>
+        <head>
+    <meta http-equiv="content-type" content="text/html; charset=UTF-8" />
+    <style>
+    body{
+        font-family:sans-serif
+    }
+.mol-container {
+  width: 100%;
+  height: 400px;
+  position: relative;
+}
+.slider{
+    width:80%;
+    margin:0 auto
+}
+.slidercontainer{
+    display:flex;
+}
+.slidercontainer > * + * {
+    margin-left: 0.5rem;
+}
+#isovalue{
+ text-align:right}
+</style>
+<script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>
+<script src="https://cdnjs.cloudflare.com/ajax/libs/rangeslider.js/2.3.3/rangeslider.min.js" integrity="sha512-BUlWdwDeJo24GIubM+z40xcj/pjw7RuULBkxOTc+0L9BaGwZPwiwtbiSVzv31qR7TWx7bs6OPTE5IyfLOorboQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
+    </head>
+    <body>
+    <div class="slidercontainer">
+    <span>Isovalue </span>
+    <span id="isovalue">0.5</span>
+    <input class="slider" type="range" id="rangeslider" min="0" max="1" step="0.025" value=0.5>
+    </div>
+    <div id="container" class="mol-container"></div>
+            <script>
+            let viewer = null;
+            let voldata = null;
+            $(document).ready(function () {
+                let element = $("#container");
+                let config = { backgroundColor: "white" };
+                viewer = $3Dmol.createViewer( element, config );
+                viewer.ui.initiateUI();
+                let data = `"""
+        + mol
+        + """`
+                viewer.addModel( data, "pdb" );
+                let cubefile = `"""
+        + cubefile
+        + """`
+                voldata = new $3Dmol.VolumeData(cubefile, "cube");
+                viewer.addIsosurface(voldata, { isoval: 0.7 , color: "blue", alpha: 0.85, smoothness: 1 });
+                viewer.getModel(0).setStyle({}, {cartoon: {color: "grayCarbon"}});
+                let probes =`"""
+        + probes
+        + """`
+                viewer.addModel(probes, "pdb");
+                viewer.getModel(1).setStyle({ "resn": "ZN" }, { "sphere": { }});
+                viewer.getModel(1).setHoverable({}, true,
+                    function (atom, viewer, event, container) {
+                        if (!atom.label) {
+                            atom.label = viewer.addLabel("ZN p=" + atom.pdbline.substring(55, 60), { position: atom, backgroundColor: "mintcream", fontColor: "black" });
+                        }
+                    },
+                    function (atom, viewer) {
+                        if (atom.label) {
+                            viewer.removeLabel(atom.label);
+                            delete atom.label;
+                        }
+                    }
+                );
+                viewer.zoomTo();
+                viewer.render();
+                viewer.zoom(0.8, 2000);
+        });
+        </script>
+         <script>
+         $("#rangeslider").rangeslider().on("change", function (el) {
+                isoval = parseFloat(el.target.value);
+                $("#isovalue").text(el.target.value)
+                viewer.addIsosurface(voldata, { isoval: parseFloat(el.target.value), color: "blue", alpha: 0.85, smoothness: 1 });
+                viewer.render();
+            });
+            </script>
+        </body></html>"""
+    )
+    return f"""<iframe style="width: 100%; height: 480px" name="result" allow="midi; geolocation; microphone; camera;
+    display-capture; encrypted-media;" sandbox="allow-modals allow-forms
+    allow-scripts allow-same-origin allow-popups
+    allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
+    allowpaymentrequest="" frameborder="0" srcdoc='{x}'></iframe>"""
+metal3d = gr.Blocks()
+with metal3d:
     gr.Markdown("# Metal3D")
+    gr.Markdown(
+        """
+        Details about implementation and code available here:
+        >Duerr, Levy and Roethlisberger, Predicting zinc ion location using deep learning, BioRxiv, 2022 "
+    """
+    )
     with gr.Group():
+        inp = gr.Textbox(
+            placeholder="PDB Code or Uniprot identifier", label="Input molecule"
+        )
+        gr.Markdown("or upload a file")
+        file = gr.File(file_count="single", type="file")
+        mode = gr.Radio(
+            ["All metalbinding residues (ASP, CYS, GLU, HIS)", "All residues"],
+            label="Residues to use for prediction",
+        )
+        btn = gr.Button("Run")
+    gr.Markdown("# Output")
+    out = gr.Textbox(label="status")
     mol = gr.HTML()
+    btn.click(fn=update, inputs=[inp, file, mode], outputs=[out, mol])
+metal3d.launch()

utils/__pycache__/helpers.cpython-38.pyc ADDED Viewed

Binary file (7.65 kB). View file

utils/__pycache__/model.cpython-38.pyc ADDED Viewed

Binary file (1.41 kB). View file

utils/__pycache__/voxelization.cpython-38.pyc ADDED Viewed

Binary file (4.99 kB). View file

utils/helpers.py ADDED Viewed

	@@ -0,0 +1,272 @@

+import os
+import multiprocessing
+from multiprocessing import Pool
+from turtle import width
+import numpy as np
+from moleculekit.molecule import Molecule
+from scipy.spatial import KDTree
+from sklearn.cluster import AgglomerativeClustering
+def create_grid_fromBB(boundingBox, voxelSize=1):
+    """Create a grid from a bounding box.
+    Parameters
+    ----------
+    boundingBox : list
+        List of the form [xmin, xmax, ymin, ymax, zmin, zmax]
+    voxelSize : float
+        Size of the voxels in Angstrom
+    Returns
+    -------
+    grid : numpy.ndarray
+        Grid of shape (nx, ny, nz)
+    box_N : numpy.ndarray
+        Number of voxels in each dimension
+    """
+    # increase grid by 0.5 to sample everything
+    xrange = np.arange(boundingBox[0][0], boundingBox[1][0] + 0.5, step=voxelSize)
+    yrange = np.arange(boundingBox[0][1], boundingBox[1][1] + 0.5, step=voxelSize)
+    zrange = np.arange(boundingBox[0][2], boundingBox[1][2] + 0.5, step=voxelSize)
+    gridpoints = np.zeros((xrange.shape[0] * yrange.shape[0] * zrange.shape[0], 3))
+    i = 0
+    for x in xrange:
+        for y in yrange:
+            for z in zrange:
+                gridpoints[i][0] = x
+                gridpoints[i][1] = y
+                gridpoints[i][2] = z
+                i += 1
+    return gridpoints, (xrange.shape[0], yrange.shape[0], zrange.shape[0])
+def get_bb(points):
+    """Return bounding box from a set of points (N,3)
+    Parameters
+    ----------
+    points : numpy.ndarray
+        Set of points (N,3)
+    Returns
+    -------
+    boundingBox : list
+        List of the form [xmin, xmax, ymin, ymax, zmin, zmax]
+    """
+    minx = np.min(points[:, 0])
+    maxx = np.max(points[:, 0])
+    miny = np.min(points[:, 1])
+    maxy = np.max(points[:, 1])
+    minz = np.min(points[:, 2])
+    maxz = np.max(points[:, 2])
+    bb = [[minx, miny, minz], [maxx, maxy, maxz]]
+    return bb
+def get_all_protein_resids(pdb_file):
+    """Return all protein residues from a pdb file
+    Parameters
+    ----------
+    pdb_file : str
+        Path to pdb file
+    Returns
+    -------
+    resids : numpy.ndarray
+        Array of protein resids old -> new
+    """
+    try:
+        prot = Molecule(pdb_file)
+    except:
+        exit("could not read file")
+    prot.filter("protein")
+    return prot.get("index", sel="name CA")
+def get_all_metalbinding_resids(pdb_file):
+    """Return all metal binding residues from a pdb file
+    Parameters
+    ----------
+    pdb_file : str
+        Path to pdb file
+    Returns
+    -------
+    resids : numpy.ndarray
+        id of resids that are metal binding
+    """
+    try:
+        prot = Molecule(pdb_file)
+    except:
+        exit("could not read file")
+    prot.filter("protein")
+    return prot.get(
+        "index",
+        sel="name CA and resname HIS HID HIE HIP CYS CYX GLU GLH GLN ASP ASH ASN GLN MET",
+    )
+def compute_average_p_fast(point, cutoff=1):
+    """Using KDTree find the closest gridpoints
+    Parameters
+    ----------
+    point : numpy.ndarray
+        Point of shape (3,)
+    cutoff : float
+        Cutoff distance in Angstrom
+    Returns
+    -------
+    average_p : numpy.ndarray
+        Average probability of shape (1,)"""
+    p = 0
+    nearest_neighbors, indices = tree.query(
+        point, k=15, distance_upper_bound=cutoff, workers=1
+    )
+    if np.min(nearest_neighbors) != np.inf:
+        p = np.mean(output_v[indices[nearest_neighbors != np.inf]])
+    return p
+def get_probability_mean(grid, prot_centers, pvalues):
+    """Compute the mean probability of all gridpoints from the globalgrid based on the individual boxes
+    Parameters
+    ----------
+    grid : numpy.ndarray
+        Grid of shape (nx, ny, nz)
+    prot_centers : numpy.ndarray
+        Protein centers of shape (N,3)
+    pvalues : numpy.ndarray
+        Probability values of shape (N,1)
+    Returns
+    -------
+    mean_p : numpy.ndarray
+        Mean probability over grid of shape (nx, ny, nz)
+    """
+    global output_v
+    output_v = pvalues
+    global prot_v
+    prot_v = prot_centers
+    cpuCount = multiprocessing.cpu_count()
+    global tree
+    tree = KDTree(prot_v)
+    p = Pool(cpuCount)
+    results = p.map(compute_average_p_fast, grid)
+    return np.array(results)
+def write_cubefile(bb, pvalues, box_N, outname="Metal3D_pmap.cube", gridres=1):
+    """Write a cube file from a probability map
+    The cube specification from gaussian is used, distance are converted to bohr
+    Parameters
+    ----------
+    bb : list
+        List of the form [xmin, xmax, ymin, ymax, zmin, zmax]
+    pvalues : numpy.ndarray
+        Probability values of shape (nx, ny, nz)
+    box_N : tuple
+        Number of voxels in each dimension
+    outname : str
+        Name of the output file
+    gridres:float
+        Resolution of the grid used for writing the voxels
+    """
+    with open(outname, "w") as cube:
+        cube.write(" Metal3D Cube File\n")
+        cube.write(" Outer Loop: X, Middle Loop y, inner Loop z\n")
+        angstromToBohr = 1.89
+        cube.write(
+            f"    1   {bb[0][0]*angstromToBohr: .6f}  {bb[0][1]*angstromToBohr: .6f}   {bb[0][2]*angstromToBohr: .6f}\n"
+        )
+        cube.write(
+            f"{str(box_N[0]).rjust(5)}    {1.890000*gridres:.9f}    0.000000    0.000000\n"
+        )
+        cube.write(
+            f"{str(box_N[1]).rjust(5)}    0.000000    {1.890000*gridres:.9f}    0.000000\n"
+        )
+        cube.write(
+            f"{str(box_N[2]).rjust(5)}    0.000000    0.000000    {1.890000*gridres:.9f}\n"
+        )
+        cube.write("    1    1.000000    0.000000    0.000000    0.000000\n")
+        o = pvalues.reshape(box_N)
+        for x in range(box_N[0]):
+            for y in range(box_N[1]):
+                for z in range(box_N[2]):
+                    cube.write(f" {o[x][y][z]: .5E}")
+                    if z % 6 == 5:
+                        cube.write("\n")
+                cube.write("\n")
+def find_unique_sites(
+    pvalues, grid, writeprobes=False, probefile="probes.pdb", threshold=5, p=0.75
+):
+    """The probability voxels are points and the voxel clouds may contain multiple metals
+    This function finds the unique sites and returns the coordinates of the unique sites with the highest p for each cluster.
+    It uses the AgglomerativeClustering algorithm to find the unique sites.
+    The threshold is the maximum distance between two points in the same cluster it can be changed to get more metal points.
+    Parameters
+    ----------
+    pvalues : numpy.ndarray
+        Probability values of shape (N, 1)
+    grid : numpy.ndarray
+        Grid of shape (N, 3)
+    writeprobes : bool
+        If True, write the probes to a pdb file
+    probefile : str
+        Name of the output file
+    threshold : float
+        Maximum distance between two points in the same cluster
+    p : float
+        Minimum probability of a point to be considered a unique site
+    """
+    points = grid[pvalues > p]
+    point_p = pvalues[pvalues > p]
+    if len(points) == 0:
+        return "no metals found"
+    clustering = AgglomerativeClustering(
+        n_clusters=None, linkage="complete", distance_threshold=threshold
+    ).fit(points)
+    message = f"min metal p={p}, n(metals) found: {clustering.n_clusters_}"
+    sites = []
+    for i in range(clustering.n_clusters_):
+        c_points = points[clustering.labels_ == i]
+        c_points_p = point_p[clustering.labels_ == i]
+        position = c_points[np.argmax(c_points_p)]
+        sites.append((position, np.max(c_points_p)))
+    if writeprobes:
+        print(f"writing probes to {probefile}")
+        with open(probefile, "w") as f:
+            for i, site in enumerate(sites):
+                f.write(
+                    f"HETATM  {i+1:3} ZN    ZN A {i+1:3}    {site[0][0]: 8.3f}{site[0][1]: 8.3f}{site[0][2]: 8.3f}  {site[1]:.2f}  0.0           ZN2+\n"
+                )
+    return message

utils/model.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import warnings
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class Model(nn.Module):
+    """Model with same padding
+    Conv5 uses a large filter size to aggregate the features from the whole box"""
+    def __init__(self):
+        super(Model, self).__init__()
+        self.conv1 = nn.Conv3d(8, 32, 3, padding="same")
+        self.conv2 = nn.Conv3d(32, 64, 3, padding="same")
+        self.conv3 = nn.Conv3d(64, 80, 3, padding="same")
+        self.conv4 = nn.Conv3d(80, 20, 3, padding="same")
+        self.conv5 = nn.Conv3d(20, 20, 20, padding="same")
+        self.conv6 = nn.Conv3d(20, 16, 3, padding="same")
+        self.conv7 = nn.Conv3d(16, 1, 3, padding="same")
+        self.dropout1 = nn.Dropout(0.2)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.relu(x)
+        x = self.conv3(x)
+        x = F.relu(x)
+        x = self.conv4(x)
+        x = F.relu(x)
+        x = self.conv5(x)
+        x = F.relu(x)
+        x = self.dropout1(x)
+        x = self.conv6(x)
+        x = F.relu(x)
+        x = self.conv7(x)
+        x = torch.sigmoid(x)
+        return x

utils/voxelization.py ADDED Viewed

	@@ -0,0 +1,209 @@

+import time
+import multiprocessing
+from multiprocessing import Pool
+import torch
+import numpy as np
+from moleculekit.molecule import Molecule
+from moleculekit.tools.voxeldescriptors import getVoxelDescriptors
+from moleculekit.tools.atomtyper import prepareProteinForAtomtyping
+from moleculekit.tools.preparation import systemPrepare
+class AtomtypingError(Exception):
+    pass
+class StructureCleaningError(Exception):
+    pass
+class ProteinPrepareError(Exception):
+    pass
+class VoxelizationError(Exception):
+    pass
+metal_atypes = (
+    "MG",
+    "ZN",
+    "MN",
+    "CA",
+    "FE",
+    "HG",
+    "CD",
+    "NI",
+    "CO",
+    "CU",
+    "K",
+    "LI",
+    "Mg",
+    "Zn",
+    "Mn",
+    "Ca",
+    "Fe",
+    "Hg",
+    "Cd",
+    "Ni",
+    "Co",
+    "Cu",
+    "Li",
+)
+def voxelize_single_notcentered(env):
+    """voxelize 1 structure, executed on a single CPU
+    Using 7 of the 8 channels supplied by moleculekit(excluding metals)
+    Additionally it uses all the metalbinding residues as channel
+    Parameters
+    ----------
+    env : tuple
+        Tuple of the form (prot, idx)
+    Returns
+    -------
+    voxels : torch.tensor
+        Voxelized structure with 8 channels (8,20,20,20)
+    prot_centers : list
+        List of the centers of the voxels (20x20x20,3)
+    prot_n : list
+        List of the number of voxels in each voxel (20x20x20)
+    prot : moleculekit.Molecule
+        Moleculekit molecule
+    """
+    prot, id = env
+    c = prot.get("coords", sel=f"index {id} and name CA")
+    size = [16, 16, 16]  # size of box
+    voxels = torch.zeros(8, 32, 32, 32)
+    try:
+        hydrophobic = prot.atomselect("element C")
+        hydrophobic = hydrophobic.reshape(hydrophobic.shape[0], 1)
+        aromatic = prot.atomselect(
+            "resname HIS HIE HIP HID TRP TYR PHE and sidechain and not name CB and not hydrogen"
+        )
+        aromatic = aromatic.reshape(aromatic.shape[0], 1)
+        metalcoordination = prot.atomselect(
+            "(name ND1 NE2 SG OE1 OE2 OD2) or (protein and name O N)"
+        )
+        metalcoordination = metalcoordination.reshape(metalcoordination.shape[0], 1)
+        hbondacceptor = prot.atomselect(
+            "(resname ASP GLU HIS HIE HIP HID SER THR MSE CYS MET and name ND2 NE2 OE1 OE2 OD1 OD2 OG OG1 SE SG) or name O"
+        )
+        hbondacceptor = hbondacceptor.reshape(metalcoordination.shape[0], 1)
+        hbonddonor = prot.atomselect(
+            "(resname ASN GLN ASH GLH TRP MSE SER THR MET CYS and name ND2 NE2 NE1 SG SE OG OG1) or name N"
+        )
+        hbonddonor = hbonddonor.reshape(metalcoordination.shape[0], 1)
+        positive = prot.atomselect(
+            "resname LYS ARG HIS HIE HIP HID and name NZ NH1 NH2 ND1 NE2 NE"
+        )
+        positive = positive.reshape(positive.shape[0], 1)
+        negative = prot.atomselect("(resname ASP GLU ASH GLH and name OD1 OD2 OE1 OE2)")
+        negative = negative.reshape(negative.shape[0], 1)
+        occupancy = prot.atomselect("protein and not hydrogen")
+        occupancy = occupancy.reshape(occupancy.shape[0], 1)
+        userchannels = np.hstack(
+            [
+                hydrophobic,
+                aromatic,
+                metalcoordination,
+                hbondacceptor,
+                hbonddonor,
+                positive,
+                negative,
+                occupancy,
+            ]
+        )
+        prot_vox, prot_centers, prot_N = getVoxelDescriptors(
+            prot,
+            center=c,
+            userchannels=userchannels,
+            boxsize=size,
+            voxelsize=0.5,
+            validitychecks=False,
+        )
+    except:
+        raise VoxelizationError(f"voxelization of {id} failed")
+    nchannels = prot_vox.shape[1]
+    prot_vox_t = (
+        prot_vox.transpose()
+        .reshape([1, nchannels, prot_N[0], prot_N[1], prot_N[2]])
+        .copy()
+    )
+    voxels = torch.from_numpy(prot_vox_t)
+    return (voxels, prot_centers, prot_N, prot.copy())
+def processStructures(pdb_file, resids, clean=True):
+    """Process a pdb file and return a list of voxelized boxes centered on the residues
+    Parameters
+    ----------
+    pdb_file : str
+        Path to pdb file
+    resids : list
+        List of resids to center the voxels on
+    clean : bool
+        If True, remove all non-protein residues from the pdb file
+    Returns
+    -------
+    voxels : torch.Tensor
+        Voxelized boxes with 8 channels (N, 8,32,32,32)
+    prot_centers_list : list
+        List of the centers of the voxels (N*32**32*32,3)
+    prot_n_list : list
+        List of the number of voxels in each box (N,3)
+    envs: list
+        List of tuples (prot, idx) (N)
+    """
+    start_time_processing = time.time()
+    # load molecule using MoleculeKit
+    try:
+        prot = Molecule(pdb_file)
+    except:
+        raise IOError("could not read pdbfile")
+    if clean:
+        prot.filter("protein and not hydrogen")
+    environments = []
+    for idx in resids:
+        try:
+            environments.append((prot.copy(), idx))
+        except:
+            print("ignoring " + idx)
+    prot_centers_list = []
+    prot_n_list = []
+    envs = []
+    results = [voxelize_single_notcentered(x) for x in environments]
+    voxels = torch.empty(len(results), 8, 32, 32, 32, device="cuda")
+    vox_env, prot_centers_list, prot_n_list, envs = zip(*results)
+    for i, vox_env in enumerate(vox_env):
+        voxels[i] = vox_env
+    print(f"Voxelization took  {time.time() - start_time_processing:.3f} seconds ")
+    return voxels, prot_centers_list, prot_n_list, envs

weights/Metal3D.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f23dc5c28ffb03a77756f7e2613cb6f4b92425c1db87b422295ddaab1204515
+size 7872827

weights/metal_0.5A_v3_d0.2_16Abox.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5a1b0c5ea6c5dcdedfae4e24b8461da107ea78c8b96c7db8f44db532c87246f
+size 13815931