Spaces:

igashov
/

DiffLinker

Sleeping

App Files Files Community

igashov commited on Apr 12, 2023

Commit

abdd514

1 Parent(s): cd2152f

Variable number of samples

Browse files

Files changed (3) hide show

app.py +49 -38
output.py +2 -2
src/generation.py +4 -6

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ from src import const
 from src.datasets import get_dataloader, collate_with_fragment_edges, parse_molecule, MOADDataset
 from src.lightning import DDPM
 from src.linker_size_lightning import SizeClassifier
-from src.generation import N_SAMPLES, generate_linkers, try_to_convert_to_sdf, get_pocket
 from zipfile import ZipFile
@@ -125,7 +125,7 @@ def show_input(in_fragments, in_protein):
         vis = show_target(in_protein)
     elif in_fragments is not None and in_protein is not None:
         vis = show_fragments_and_target(in_fragments, in_protein)
-    return [vis, gr.Radio.update(visible=False), None]
 def show_fragments(in_fragments):
@@ -167,28 +167,25 @@ def clear_fragments_input(in_protein):
     vis = ''
     if in_protein is not None:
         vis = show_target(in_protein)
-    return [None, vis, gr.Radio.update(visible=False), None]
 def clear_protein_input(in_fragments):
     vis = ''
     if in_fragments is not None:
         vis = show_fragments(in_fragments)
-    return [None, vis, gr.Radio.update(visible=False), None]
 def click_on_example(example):
     fragment_fname, target_fname = example
     fragment_path = f'examples/{fragment_fname}' if fragment_fname != '' else None
     target_path = f'examples/{target_fname}' if target_fname != '' else None
-    return [fragment_path, target_path, 50, 0] + show_input(fragment_path, target_path)
-def draw_sample(idx, out_files):
-    with_protein = (len(out_files) == N_SAMPLES + 3)
-    if isinstance(idx, str):
-        idx = int(idx.strip().split(' ')[-1]) - 1
     in_file = out_files[1]
     in_sdf = in_file if isinstance(in_file, str) else in_file.name
@@ -204,8 +201,7 @@ def draw_sample(idx, out_files):
         input_target_content = read_molecule_content(in_pdb)
         target_fmt = in_pdb.split('.')[-1]
-    out_file = out_files[idx + offset]
-    out_sdf = out_file if isinstance(out_file, str) else out_file.name
     generated_molecule_content = read_molecule_content(out_sdf)
     molecule_fmt = out_sdf.split('.')[-1]
@@ -237,17 +233,17 @@ def compress(output_fnames, name):
     return archive_path
-def generate(in_fragments, in_protein, n_steps, n_atoms, radio_samples, selected_atoms):
     if in_fragments is None:
         return [None, None, None, None]
     if in_protein is None:
-        return generate_without_pocket(in_fragments, n_steps, n_atoms, radio_samples, selected_atoms)
     else:
-        return generate_with_pocket(in_fragments, in_protein, n_steps, n_atoms, radio_samples, selected_atoms)
-def generate_without_pocket(input_file, n_steps, n_atoms, radio_samples, selected_atoms):
     # Parsing selected atoms (javascript output)
     selected_atoms = selected_atoms.strip()
     if selected_atoms == '':
@@ -310,8 +306,8 @@ def generate_without_pocket(input_file, n_steps, n_atoms, radio_samples, selecte
         'fragment_mask': torch.tensor(fragment_mask, dtype=const.TORCH_FLOAT, device=device),
         'linker_mask': torch.tensor(linker_mask, dtype=const.TORCH_FLOAT, device=device),
         'num_atoms': len(positions),
-    }] * N_SAMPLES
-    dataloader = get_dataloader(dataset, batch_size=N_SAMPLES, collate_fn=collate_with_fragment_edges)
     print('Created dataloader')
     ddpm.edm.T = n_steps
@@ -333,26 +329,33 @@ def generate_without_pocket(input_file, n_steps, n_atoms, radio_samples, selecte
     for data in dataloader:
         try:
-            generate_linkers(ddpm=ddpm, data=data, sample_fn=sample_fn, name=name, with_pocket=False)
         except Exception as e:
             e = str(e).replace('\'', '')
             error = f'Caught exception while generating linkers: {e}'
             msg = output.ERROR_FORMAT_MSG.format(message=error)
             return [output.IFRAME_TEMPLATE.format(html=msg), None, None, None]
-    out_files = try_to_convert_to_sdf(name)
     out_files = [inp_sdf] + out_files
     out_files = [compress(out_files, name=name)] + out_files
     return [
-        draw_sample(radio_samples, out_files),
         out_files,
-        gr.Radio.update(visible=True),
         None
     ]
-def generate_with_pocket(in_fragments, in_protein, n_steps, n_atoms, radio_samples, selected_atoms):
     # Parsing selected atoms (javascript output)
     selected_atoms = selected_atoms.strip()
     if selected_atoms == '':
@@ -443,11 +446,11 @@ def generate_with_pocket(in_fragments, in_protein, n_steps, n_atoms, radio_sampl
         'fragment_mask': torch.tensor(fragment_mask, dtype=const.TORCH_FLOAT, device=device),
         'linker_mask': torch.tensor(linker_mask, dtype=const.TORCH_FLOAT, device=device),
         'num_atoms': len(positions),
-    }] * N_SAMPLES
     dataset = MOADDataset(data=dataset)
     ddpm.val_dataset = dataset
-    dataloader = get_dataloader(dataset, batch_size=N_SAMPLES, collate_fn=collate_with_fragment_edges)
     print('Created dataloader')
     ddpm.edm.T = n_steps
@@ -469,21 +472,28 @@ def generate_with_pocket(in_fragments, in_protein, n_steps, n_atoms, radio_sampl
     for data in dataloader:
         try:
-            generate_linkers(ddpm=ddpm, data=data, sample_fn=sample_fn, name=name, with_pocket=True)
         except Exception as e:
             e = str(e).replace('\'', '')
             error = f'Caught exception while generating linkers: {e}'
             msg = output.ERROR_FORMAT_MSG.format(message=error)
             return [output.IFRAME_TEMPLATE.format(html=msg), None, None, None]
-    out_files = try_to_convert_to_sdf(name)
     out_files = [inp_sdf, inp_pdb] + out_files
     out_files = [compress(out_files, name=name)] + out_files
     return [
-        draw_sample(radio_samples, out_files),
         out_files,
-        gr.Radio.update(visible=True),
         None
     ]
@@ -516,6 +526,7 @@ with demo:
                     label="Linker Size: DiffLinker will predict it if set to 0",
                     step=1
                 )
                 examples = gr.Dataset(
                     components=[gr.File(visible=False), gr.File(visible=False)],
                     samples=[
@@ -524,7 +535,6 @@ with demo:
                         ['examples/3hz1_fragments.sdf', 'examples/3hz1_protein.pdb'],
                         ['examples/5ou2_fragments.sdf', 'examples/5ou2_protein.pdb'],
                     ],
-                    # headers=['Fragments', 'Target Protein'],
                     type='values',
                 )
@@ -537,13 +547,14 @@ with demo:
             with gr.Column():
                 gr.Markdown('## Visualization')
                 gr.Markdown('**Hint:** click on atoms to select anchor points (optionally)')
-                samples = gr.Radio(
-                    choices=['Sample 1', 'Sample 2', 'Sample 3', 'Sample 4', 'Sample 5'],
-                    value='Sample 1',
                     type='value',
-                    show_label=False,
                     visible=False,
                     interactive=True,
                 )
                 visualization = gr.HTML()
@@ -570,17 +581,17 @@ with demo:
     examples.click(
         fn=click_on_example,
         inputs=[examples],
-        outputs=[input_fragments_file, input_protein_file, n_steps, n_atoms, visualization, samples, hidden]
     )
     button.click(
         fn=generate,
-        inputs=[input_fragments_file, input_protein_file, n_steps, n_atoms, samples, hidden],
         outputs=[visualization, output_files, samples, hidden],
         _js=output.RETURN_SELECTION_JS,
     )
-    samples.change(
         fn=draw_sample,
-        inputs=[samples, output_files],
         outputs=[visualization],
     )
     demo.load(_js=output.STARTUP_JS)

 from src.datasets import get_dataloader, collate_with_fragment_edges, parse_molecule, MOADDataset
 from src.lightning import DDPM
 from src.linker_size_lightning import SizeClassifier
+from src.generation import generate_linkers, try_to_convert_to_sdf, get_pocket
 from zipfile import ZipFile
         vis = show_target(in_protein)
     elif in_fragments is not None and in_protein is not None:
         vis = show_fragments_and_target(in_fragments, in_protein)
+    return [vis, gr.Dropdown.update(choices=[], value=None, visible=False), None]
 def show_fragments(in_fragments):
     vis = ''
     if in_protein is not None:
         vis = show_target(in_protein)
+    return [None, vis, gr.Dropdown.update(choices=[], value=None, visible=False), None]
 def clear_protein_input(in_fragments):
     vis = ''
     if in_fragments is not None:
         vis = show_fragments(in_fragments)
+    return [None, vis, gr.Dropdown.update(choices=[], value=None, visible=False), None]
 def click_on_example(example):
     fragment_fname, target_fname = example
     fragment_path = f'examples/{fragment_fname}' if fragment_fname != '' else None
     target_path = f'examples/{target_fname}' if target_fname != '' else None
+    return [fragment_path, target_path] + show_input(fragment_path, target_path)
+def draw_sample(sample_path, out_files, num_samples):
+    with_protein = (len(out_files) == num_samples + 3)
     in_file = out_files[1]
     in_sdf = in_file if isinstance(in_file, str) else in_file.name
         input_target_content = read_molecule_content(in_pdb)
         target_fmt = in_pdb.split('.')[-1]
+    out_sdf = sample_path if isinstance(sample_path, str) else sample_path.name
     generated_molecule_content = read_molecule_content(out_sdf)
     molecule_fmt = out_sdf.split('.')[-1]
     return archive_path
+def generate(in_fragments, in_protein, n_steps, n_atoms, num_samples, selected_atoms):
     if in_fragments is None:
         return [None, None, None, None]
     if in_protein is None:
+        return generate_without_pocket(in_fragments, n_steps, n_atoms, num_samples, selected_atoms)
     else:
+        return generate_with_pocket(in_fragments, in_protein, n_steps, n_atoms, num_samples, selected_atoms)
+def generate_without_pocket(input_file, n_steps, n_atoms, num_samples, selected_atoms):
     # Parsing selected atoms (javascript output)
     selected_atoms = selected_atoms.strip()
     if selected_atoms == '':
         'fragment_mask': torch.tensor(fragment_mask, dtype=const.TORCH_FLOAT, device=device),
         'linker_mask': torch.tensor(linker_mask, dtype=const.TORCH_FLOAT, device=device),
         'num_atoms': len(positions),
+    }] * num_samples
+    dataloader = get_dataloader(dataset, batch_size=num_samples, collate_fn=collate_with_fragment_edges)
     print('Created dataloader')
     ddpm.edm.T = n_steps
     for data in dataloader:
         try:
+            generate_linkers(
+                ddpm=ddpm, data=data, num_samples=num_samples, sample_fn=sample_fn, name=name, with_pocket=False
+            )
         except Exception as e:
             e = str(e).replace('\'', '')
             error = f'Caught exception while generating linkers: {e}'
             msg = output.ERROR_FORMAT_MSG.format(message=error)
             return [output.IFRAME_TEMPLATE.format(html=msg), None, None, None]
+    out_files = try_to_convert_to_sdf(name, num_samples)
     out_files = [inp_sdf] + out_files
     out_files = [compress(out_files, name=name)] + out_files
+    choice = out_files[2]
     return [
+        draw_sample(choice, out_files, num_samples),
         out_files,
+        gr.Dropdown.update(
+            choices=out_files[2:],
+            value=choice,
+            visible=True,
+        ),
         None
     ]
+def generate_with_pocket(in_fragments, in_protein, n_steps, n_atoms, num_samples, selected_atoms):
     # Parsing selected atoms (javascript output)
     selected_atoms = selected_atoms.strip()
     if selected_atoms == '':
         'fragment_mask': torch.tensor(fragment_mask, dtype=const.TORCH_FLOAT, device=device),
         'linker_mask': torch.tensor(linker_mask, dtype=const.TORCH_FLOAT, device=device),
         'num_atoms': len(positions),
+    }] * num_samples
     dataset = MOADDataset(data=dataset)
     ddpm.val_dataset = dataset
+    dataloader = get_dataloader(dataset, batch_size=num_samples, collate_fn=collate_with_fragment_edges)
     print('Created dataloader')
     ddpm.edm.T = n_steps
     for data in dataloader:
         try:
+            generate_linkers(
+                ddpm=ddpm, data=data, num_samples=num_samples, sample_fn=sample_fn, name=name, with_pocket=True
+            )
         except Exception as e:
             e = str(e).replace('\'', '')
             error = f'Caught exception while generating linkers: {e}'
             msg = output.ERROR_FORMAT_MSG.format(message=error)
             return [output.IFRAME_TEMPLATE.format(html=msg), None, None, None]
+    out_files = try_to_convert_to_sdf(name, num_samples)
     out_files = [inp_sdf, inp_pdb] + out_files
     out_files = [compress(out_files, name=name)] + out_files
+    choice = out_files[3]
     return [
+        draw_sample(choice, out_files, num_samples),
         out_files,
+        gr.Dropdown.update(
+            choices=out_files[3:],
+            value=choice,
+            visible=True,
+        ),
         None
     ]
                     label="Linker Size: DiffLinker will predict it if set to 0",
                     step=1
                 )
+                n_samples = gr.Slider(minimum=5, maximum=50, label="Number of Samples", step=5)
                 examples = gr.Dataset(
                     components=[gr.File(visible=False), gr.File(visible=False)],
                     samples=[
                         ['examples/3hz1_fragments.sdf', 'examples/3hz1_protein.pdb'],
                         ['examples/5ou2_fragments.sdf', 'examples/5ou2_protein.pdb'],
                     ],
                     type='values',
                 )
             with gr.Column():
                 gr.Markdown('## Visualization')
                 gr.Markdown('**Hint:** click on atoms to select anchor points (optionally)')
+                samples = gr.Dropdown(
+                    choices=[],
+                    value=None,
                     type='value',
+                    multiselect=False,
                     visible=False,
                     interactive=True,
+                    label='Samples'
                 )
                 visualization = gr.HTML()
     examples.click(
         fn=click_on_example,
         inputs=[examples],
+        outputs=[input_fragments_file, input_protein_file, visualization, samples, hidden]
     )
     button.click(
         fn=generate,
+        inputs=[input_fragments_file, input_protein_file, n_steps, n_atoms, n_samples, hidden],
         outputs=[visualization, output_files, samples, hidden],
         _js=output.RETURN_SELECTION_JS,
     )
+    samples.select(
         fn=draw_sample,
+        inputs=[samples, output_files, n_samples],
         outputs=[visualization],
     )
     demo.load(_js=output.STARTUP_JS)

output.py CHANGED Viewed

@@ -365,7 +365,7 @@ STARTUP_JS = """
 """
 RETURN_SELECTION_JS = """
-(input_file, input_protein_file, n_steps, n_atoms, samples, hidden) => {
     let selected = []
     for (const [atom, add] of Object.entries(window.selected_elements)) {
       if (add) {
@@ -378,6 +378,6 @@ RETURN_SELECTION_JS = """
       }
     }
     console.log("Finished parsing");
-    return [input_file, input_protein_file, n_steps, n_atoms, samples, selected.join(",")];
 }
 """

 """
 RETURN_SELECTION_JS = """
+(input_file, input_protein_file, n_steps, n_atoms, n_samples, hidden) => {
     let selected = []
     for (const [atom, add] of Object.entries(window.selected_elements)) {
       if (add) {
       }
     }
     console.log("Finished parsing");
+    return [input_file, input_protein_file, n_steps, n_atoms, n_samples, selected.join(",")];
 }
 """

src/generation.py CHANGED Viewed

@@ -9,10 +9,8 @@ from src.visualizer import save_xyz_file
 from src.utils import FoundNaNException
 from src.datasets import get_one_hot
-N_SAMPLES = 5
-def generate_linkers(ddpm, data, sample_fn, name, with_pocket=False):
     chain = node_mask = None
     for i in range(5):
         try:
@@ -39,14 +37,14 @@ def generate_linkers(ddpm, data, sample_fn, name, with_pocket=False):
     if with_pocket:
         node_mask[torch.where(data['pocket_mask'])] = 0
-    names = [f'output_{i + 1}_{name}' for i in range(N_SAMPLES)]
     save_xyz_file('results', h, x, node_mask, names=names, is_geom=True, suffix='')
     print('Saved XYZ files')
-def try_to_convert_to_sdf(name):
     out_files = []
-    for i in range(N_SAMPLES):
         out_xyz = f'results/output_{i + 1}_{name}_.xyz'
         out_sdf = f'results/output_{i + 1}_{name}_.sdf'
         subprocess.run(f'obabel {out_xyz} -O {out_sdf}', shell=True)

 from src.utils import FoundNaNException
 from src.datasets import get_one_hot
+def generate_linkers(ddpm, data, num_samples, sample_fn, name, with_pocket=False):
     chain = node_mask = None
     for i in range(5):
         try:
     if with_pocket:
         node_mask[torch.where(data['pocket_mask'])] = 0
+    names = [f'output_{i + 1}_{name}' for i in range(num_samples)]
     save_xyz_file('results', h, x, node_mask, names=names, is_geom=True, suffix='')
     print('Saved XYZ files')
+def try_to_convert_to_sdf(name, num_samples):
     out_files = []
+    for i in range(num_samples):
         out_xyz = f'results/output_{i + 1}_{name}_.xyz'
         out_sdf = f'results/output_{i + 1}_{name}_.sdf'
         subprocess.run(f'obabel {out_xyz} -O {out_sdf}', shell=True)