Spaces:

Cpt-Nemo
/

Boltz_Interface

Sleeping

App Files Files

xet

Community

Cpt-Nemo commited on Sep 1

Commit

c0985d1

verified ·

1 Parent(s): 758f213

Update boltz_gradio.py

Browse files

Files changed (1) hide show

boltz_gradio.py +73 -37

boltz_gradio.py CHANGED Viewed

@@ -9,6 +9,7 @@ import plotly.graph_objects as go
 from yaml import safe_dump, safe_load
 from rdkit import Chem, RDLogger
 from rdkit.Chem import AllChem, Descriptors
 from rdkit.Geometry import Point3D
 from rdkit.Chem.rdDetermineBonds import DetermineConnectivity
 from rdkit.Contrib.SA_Score import sascorer # type: ignore
@@ -56,10 +57,10 @@ property_functions = {'Molecular Weight'  : Descriptors.MolWt,
                       'Formal Charge'  : lambda mol: sum([atom.GetFormalCharge() for atom in mol.GetAtoms()]),
                       'Num. of Heavy Atoms' : Descriptors.HeavyAtomCount,
                       'Num. of Atoms'  : lambda mol: mol.GetNumAtoms(),
-                      'Molar Refractivity'  : Descriptors.MolMR,
-                      'Quantitative Estimate of Drug-Likeness (QED)' : Descriptors.qed,
-                      'Natural Product-likeness Score (NP)': partial(npscorer.scoreMol, fscore=fscore),
-                      'Synthetic Accessibility Score (SA)': sascorer.calculateScore}
 file_extract_matching_map = {'Structure' : ['.cif', '.sdf', '_bust.csv'],
                              'Confidence': ['confidence_'],
@@ -351,8 +352,8 @@ def __extract_cif_ca_coord(cif_f: str, get_weight: bool=True):
                         if i in backbone_idx]
         bb_coords_conf = np.array(bb_coords_conf, float)
         conf = bb_coords_conf[:, -1]/100
-        thres = 0.4
-        return bb_coords_conf[:, :3], mmcif_dict, (np.maximum(conf-thres, 0.05) / (1-thres)) ** 2
     else:
         bb_coords = [[x, y, z] for i, (x, y, z) in enumerate(zip(mmcif_dict['_atom_site.Cartn_x'],
                                                                  mmcif_dict['_atom_site.Cartn_y'],
@@ -446,8 +447,10 @@ def execute_single_boltz(file_name: str, yaml_str: str,
     yield gr.update(value='Predicting...', interactive=False), ''
     full_output = ''
     curr_running_process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                                            text=True, encoding="utf-8")
     for line in iter(curr_running_process.stdout.readline, ''):
         if 'The loaded checkpoint was produced with' in line or\
             'You are using a CUDA device' in line:  # Just skip these warnings
@@ -515,8 +518,10 @@ def execute_multi_boltz(all_files: list[str],
     yield gr.update(value='Predicting...', interactive=False), ''
     full_output = ''
     curr_running_process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                                            text=True, encoding="utf-8")
     for line in iter(curr_running_process.stdout.readline, ''):
         if 'The loaded checkpoint was produced with' in line or\
             'You are using a CUDA device' in line:
@@ -607,11 +612,14 @@ def execute_vhts_boltz(file_prefix: str, all_ligands: pd.DataFrame,
             f.write(safe_dump(yaml_template_dict))
         # execute on only a single file to retrieve msa, prevent colabfold server overload
         if idx == 0:
             yield gr.update(value='Predicting...', interactive=False), ''
             full_output = ''
             curr_running_process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                                                    text=True, encoding="utf-8")
             for line in iter(curr_running_process.stdout.readline, ''):
                 if 'The loaded checkpoint was produced with' in line or\
                     'You are using a CUDA device' in line:  # Just skip these warnings
@@ -639,7 +647,7 @@ def execute_vhts_boltz(file_prefix: str, all_ligands: pd.DataFrame,
     cmd[6] = str(devices)   # replace the "devices" param back to user-defined value
     curr_running_process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                                            text=True, encoding="utf-8")
     for line in iter(curr_running_process.stdout.readline, ''):
         if 'The loaded checkpoint was produced with' in line or\
             'You are using a CUDA device' in line:
@@ -711,9 +719,11 @@ def _process_single_chem_file(chem_f: str):
         n = __check_smi_title_line(chem_f)
         mols = Chem.MultithreadedSmilesMolSupplier(chem_f, titleLine=n)
     names, smiles = [], []
     for mol in mols:
         if mol is None:
             continue
         if mol.HasProp('_Name'):
             name = mol.GetProp('_Name')
         else:
@@ -753,6 +763,7 @@ def _process_tabular_files(chem_f: list[str], name_col: str, chem_col: str, deli
     except:
         return [], []
     final_names, final_smiles = [], []
     for _, row in df.iterrows():
         name = row[name_col]
         chem_str = row[chem_col]
@@ -761,6 +772,7 @@ def _process_tabular_files(chem_f: list[str], name_col: str, chem_col: str, deli
         else:
             mol = Chem.MolFromSmiles(chem_str)
         if mol is not None:
             smi = Chem.MolToSmiles(mol)
             final_names.append(name)
             final_smiles.append(smi)
@@ -1695,6 +1707,13 @@ def draw_smiles_3d(smiles_str: str):
             if isinstance(v, float):
                 v = round(v, 4)
             data_dict['Value'].append(v)
         yield get_mol_molstar_html(''), gr.update(value=pd.DataFrame(data_dict))
         new_mol = rdkit_embed_with_timeout(mol, 60)
         if new_mol is None:
@@ -2361,6 +2380,7 @@ with gr.Blocks(css=css, theme=gr.themes.Origin()) as Interface:
             @gr.render(inputs=vhts_entity_number)
             def vhts_append_new_entity(counts: int):
                 component_refs = []
                 for i in range(counts):
                     gr.Markdown(f'<span style="font-size:15px; font-weight:bold;">Entity {i+1}</span>', key=f'MK_{i}')
@@ -2394,11 +2414,16 @@ with gr.Blocks(css=css, theme=gr.themes.Origin()) as Interface:
                                                                     elem_classes='validation',
                                                                     show_legend=True)
                         with gr.Column(key=f'Entity_{i}_sub3', scale=1):
-                            cyclic_ckbox = gr.Checkbox(False, label='Cyclic', key=f'vhts_Cyclic_{i}')
                             modification_text = gr.Text(label='Modifications (Residue:CCD)',
-                                                        placeholder='2:ALY,15:MSE', key=f'vhts_Mod_{i}')
                         component_refs.extend([entity_menu, chain_name_text, sequence_text,
-                                               cyclic_ckbox, modification_text])
                         entity_menu.change(change_sequence_label,
                                            inputs=[entity_menu, sequence_text, cyclic_ckbox],
                                            outputs=[sequence_text, highlight_text, cyclic_ckbox])
@@ -2407,17 +2432,19 @@ with gr.Blocks(css=css, theme=gr.themes.Origin()) as Interface:
                                              outputs=highlight_text)
                     gr.HTML("<hr>")
-                chain_components = [comp for i, comp in enumerate(component_refs) if i % 5 == 1]
-                entity_components = [comp for i, comp in enumerate(component_refs) if i % 5 == 0]
-                for i, chain_input in enumerate(chain_components):
                     chain_input.submit(vhts_update_all_chains_dropdown,
                                        inputs=chain_components,
                                        outputs=[vhts_contact_1_dropdown, vhts_contact_2_dropdown,
                                                 vhts_target_chain_ids])
-                    entity_components[i].change(vhts_update_all_chains_dropdown,
-                                                inputs=chain_components,
-                                                outputs=[vhts_contact_1_dropdown, vhts_contact_2_dropdown,
-                                                         vhts_target_chain_ids])
                 def write_yaml_func(binder, target, pocket_max_d, pocket_f, aff_binder,
                                     cont_1_c, cont_1_r, cont_2_c, cont_2_r, contact_max_dist, contact_f,
@@ -2486,11 +2513,12 @@ with gr.Blocks(css=css, theme=gr.themes.Origin()) as Interface:
                         data_dict.update({'templates': all_templates})
                     existing_chains = []
-                    all_components += ['Ligand', binder, 'c1ccccc1', False, '']
-                    for i in range(0, len(all_components), 5):
-                        entity, chain, seq, cyclic, mod = all_components[i:i+5]
                         seq = seq.strip()
                         # set entity type
@@ -2504,36 +2532,36 @@ with gr.Blocks(css=css, theme=gr.themes.Origin()) as Interface:
                         if len(chains) == 1:
                             id = chain.strip()
                             if id in existing_chains:
-                                return f'Chain {id} of Entity {i//5+1} already existed!'
                             existing_chains.append(id)
                         else:
                             id = [c.strip() for c in chains]
                             for _i in id:
                                 if id.count(_i) > 1:
-                                    return f'Duplicate chain found within Entity {i//5+1}!'
                                 if _i in existing_chains:
-                                    return f'Chain {id} of Entity {i//5+1} already existed!'
                             existing_chains.extend(id)
                         # set key of sequence ('sequence', 'ccd' or 'smiles')
                         if not seq:
-                            return f'Entity {i//5+1} is empty!'
                         if entity == 'CCD':
-                            seq = seq.upper()
                             seq_key = 'ccd'
-                            if not re.fullmatch(r'(?:[A-Z0-9]{3}|[A-Z0-9]{5}|[A-Z]{2})', seq):
-                                return f'Entity {i//5+1} is not a valid CCD ID!'
                         elif entity == 'Ligand':
                             seq_key = 'smiles'
                             if Chem.MolFromSmiles(seq) is None:
-                                return f'Entity {i//5+1} is not a valid SMILES!'
                         else:
                             seq = seq.upper()
                             seq_key = 'sequence'
                             valid_strs = allow_char_dict[entity]
                             for char in seq:
                                 if char not in valid_strs:
-                                    return f'Entity {i//5+1} is not a valid {entity}!'
                         # set modification
                         if mod:
@@ -2541,7 +2569,7 @@ with gr.Blocks(css=css, theme=gr.themes.Origin()) as Interface:
                             all_mods = mod.split(',')
                             for pos_ccd in all_mods:
                                 if ':' not in pos_ccd:
-                                    return (f'Invalid modification for Entity {i//5+1}, please use ":" to '
                                             f'separate residue and CCD!\n')
                                 pos, ccd = pos_ccd.split(':')
                                 modifications.append({'position': int(pos), 'ccd': ccd})
@@ -2550,13 +2578,21 @@ with gr.Blocks(css=css, theme=gr.themes.Origin()) as Interface:
                         if entity_type == 'ligand':
                             curr_dict = {entity_type: {'id'    : id,
-                                                       seq_key : seq,}
-                                         }
                         else:
                             curr_dict = {entity_type: {'id'    : id,
                                                        seq_key : seq.upper(),
-                                                       'cyclic': cyclic}
-                                         }
                         if modifications is not None:
                             curr_dict[entity_type]['modifications'] = modifications

 from yaml import safe_dump, safe_load
 from rdkit import Chem, RDLogger
 from rdkit.Chem import AllChem, Descriptors
+from rdkit.Chem.SaltRemover import SaltRemover
 from rdkit.Geometry import Point3D
 from rdkit.Chem.rdDetermineBonds import DetermineConnectivity
 from rdkit.Contrib.SA_Score import sascorer # type: ignore
                       'Formal Charge'  : lambda mol: sum([atom.GetFormalCharge() for atom in mol.GetAtoms()]),
                       'Num. of Heavy Atoms' : Descriptors.HeavyAtomCount,
                       'Num. of Atoms'  : lambda mol: mol.GetNumAtoms(),
+                      'Molar Refractivity'  : Descriptors.MolMR}
+property_functions_no_H = {'Quantitative Estimate of Drug-Likeness (QED)' : Descriptors.qed,
+                           'Natural Product-likeness Score (NP)': partial(npscorer.scoreMol, fscore=fscore),
+                           'Synthetic Accessibility Score (SA)': sascorer.calculateScore}
 file_extract_matching_map = {'Structure' : ['.cif', '.sdf', '_bust.csv'],
                              'Confidence': ['confidence_'],
                         if i in backbone_idx]
         bb_coords_conf = np.array(bb_coords_conf, float)
         conf = bb_coords_conf[:, -1]/100
+        thres = 0.5
+        return bb_coords_conf[:, :3], mmcif_dict, (np.maximum(conf-thres, 0.) / (1-thres)) ** 2
     else:
         bb_coords = [[x, y, z] for i, (x, y, z) in enumerate(zip(mmcif_dict['_atom_site.Cartn_x'],
                                                                  mmcif_dict['_atom_site.Cartn_y'],
     yield gr.update(value='Predicting...', interactive=False), ''
     full_output = ''
+    env = dict(os.environ)
+    env['NCCL_P2P_DISABLE'] = '1'
     curr_running_process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                                            text=True, encoding="utf-8", env=env)
     for line in iter(curr_running_process.stdout.readline, ''):
         if 'The loaded checkpoint was produced with' in line or\
             'You are using a CUDA device' in line:  # Just skip these warnings
     yield gr.update(value='Predicting...', interactive=False), ''
     full_output = ''
+    env = dict(os.environ)
+    env['NCCL_P2P_DISABLE'] = '1'
     curr_running_process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                                            text=True, encoding="utf-8", env=env)
     for line in iter(curr_running_process.stdout.readline, ''):
         if 'The loaded checkpoint was produced with' in line or\
             'You are using a CUDA device' in line:
             f.write(safe_dump(yaml_template_dict))
         # execute on only a single file to retrieve msa, prevent colabfold server overload
+        # This should work for custom MSA too (update)
         if idx == 0:
             yield gr.update(value='Predicting...', interactive=False), ''
             full_output = ''
+            env = dict(os.environ)
+            env['NCCL_P2P_DISABLE'] = '1'
             curr_running_process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                                                    text=True, encoding="utf-8", env=env)
             for line in iter(curr_running_process.stdout.readline, ''):
                 if 'The loaded checkpoint was produced with' in line or\
                     'You are using a CUDA device' in line:  # Just skip these warnings
     cmd[6] = str(devices)   # replace the "devices" param back to user-defined value
     curr_running_process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                                            text=True, encoding="utf-8", env=env)
     for line in iter(curr_running_process.stdout.readline, ''):
         if 'The loaded checkpoint was produced with' in line or\
             'You are using a CUDA device' in line:
         n = __check_smi_title_line(chem_f)
         mols = Chem.MultithreadedSmilesMolSupplier(chem_f, titleLine=n)
     names, smiles = [], []
+    remover = SaltRemover()
     for mol in mols:
         if mol is None:
             continue
+        mol = remover.StripMol(mol)
         if mol.HasProp('_Name'):
             name = mol.GetProp('_Name')
         else:
     except:
         return [], []
     final_names, final_smiles = [], []
+    remover = SaltRemover()
     for _, row in df.iterrows():
         name = row[name_col]
         chem_str = row[chem_col]
         else:
             mol = Chem.MolFromSmiles(chem_str)
         if mol is not None:
+            mol = remover.StripMol(mol)
             smi = Chem.MolToSmiles(mol)
             final_names.append(name)
             final_smiles.append(smi)
             if isinstance(v, float):
                 v = round(v, 4)
             data_dict['Value'].append(v)
+        mol = Chem.RemoveHs(mol)
+        data_dict['Property'].append(list(property_functions_no_H))
+        for func in property_functions_no_H.values():
+            v = func(mol)
+            if isinstance(v, float):
+                v = round(v, 4)
+            data_dict['Value'].append(v)
         yield get_mol_molstar_html(''), gr.update(value=pd.DataFrame(data_dict))
         new_mol = rdkit_embed_with_timeout(mol, 60)
         if new_mol is None:
             @gr.render(inputs=vhts_entity_number)
             def vhts_append_new_entity(counts: int):
+                component_cnt = 7
                 component_refs = []
                 for i in range(counts):
                     gr.Markdown(f'<span style="font-size:15px; font-weight:bold;">Entity {i+1}</span>', key=f'MK_{i}')
                                                                     elem_classes='validation',
                                                                     show_legend=True)
                         with gr.Column(key=f'Entity_{i}_sub3', scale=1):
+                            with gr.Row(key=f'Entity_{i}_sub3_group1_row1'):
+                                cyclic_ckbox = gr.Checkbox(False, label='Cyclic', min_width=50, key=f'Cyclic_{i}')
+                                msa_ckbox = gr.Checkbox(True, label='Use MSA', min_width=50, interactive=True,
+                                                        key=f'use_MSA_{i}')
                             modification_text = gr.Text(label='Modifications (Residue:CCD)',
+                                                        placeholder='2:ALY,15:MSE', key=f'Mod_{i}')
+                            msa_file = gr.File(label='MSA File', file_types=['.a3m', '.csv'], height=92,
+                                               elem_classes='small-upload-style', key=f'msa_file_{i}')
                         component_refs.extend([entity_menu, chain_name_text, sequence_text,
+                                               cyclic_ckbox, modification_text, msa_file, msa_ckbox])
                         entity_menu.change(change_sequence_label,
                                            inputs=[entity_menu, sequence_text, cyclic_ckbox],
                                            outputs=[sequence_text, highlight_text, cyclic_ckbox])
                                              outputs=highlight_text)
                     gr.HTML("<hr>")
+                chain_components = [comp for i, comp in enumerate(component_refs) if i % component_cnt <= 1]
+                entity_components = [comp for i, comp in enumerate(component_refs) if i % component_cnt == 0]
+                for i in range(0, len(chain_components), 2):
+                    chain_input = chain_components[i+1]
+                    entity_menu = entity_components[i//2]
                     chain_input.submit(vhts_update_all_chains_dropdown,
                                        inputs=chain_components,
                                        outputs=[vhts_contact_1_dropdown, vhts_contact_2_dropdown,
                                                 vhts_target_chain_ids])
+                    entity_menu.change(vhts_update_all_chains_dropdown,
+                                       inputs=chain_components,
+                                       outputs=[vhts_contact_1_dropdown, vhts_contact_2_dropdown,
+                                                vhts_target_chain_ids])
                 def write_yaml_func(binder, target, pocket_max_d, pocket_f, aff_binder,
                                     cont_1_c, cont_1_r, cont_2_c, cont_2_r, contact_max_dist, contact_f,
                         data_dict.update({'templates': all_templates})
                     existing_chains = []
+                    msa_rng_name = uuid.uuid4().hex[:8]
+                    all_components += ['Ligand', binder, 'c1ccccc1', False, '', '', False]
+                    for i in range(0, len(all_components), component_cnt):
+                        entity, chain, seq, cyclic, mod, msa_pth, use_msa = all_components[i:i+component_cnt]
                         seq = seq.strip()
                         # set entity type
                         if len(chains) == 1:
                             id = chain.strip()
                             if id in existing_chains:
+                                return f'Chain {id} of Entity {i//component_cnt+1} already existed!'
                             existing_chains.append(id)
                         else:
                             id = [c.strip() for c in chains]
                             for _i in id:
                                 if id.count(_i) > 1:
+                                    return f'Duplicate chain found within Entity {i//component_cnt+1}!'
                                 if _i in existing_chains:
+                                    return f'Chain {id} of Entity {i//component_cnt+1} already existed!'
                             existing_chains.extend(id)
                         # set key of sequence ('sequence', 'ccd' or 'smiles')
                         if not seq:
+                            return f'Entity {i//component_cnt+1} is empty!'
                         if entity == 'CCD':
                             seq_key = 'ccd'
+                            seq = seq.upper()
+                            if not re.fullmatch(r'(?:[A-Z0-9]{3}|[A-Z0-9]{5})|[A-Z]{2}', seq):
+                                return f'Entity {i//component_cnt+1} is not a valid CCD ID!'
                         elif entity == 'Ligand':
                             seq_key = 'smiles'
                             if Chem.MolFromSmiles(seq) is None:
+                                return f'Entity {i//component_cnt+1} is not a valid SMILES!'
                         else:
                             seq = seq.upper()
                             seq_key = 'sequence'
                             valid_strs = allow_char_dict[entity]
                             for char in seq:
                                 if char not in valid_strs:
+                                    return f'Entity {i//component_cnt+1} is not a valid {entity}!'
                         # set modification
                         if mod:
                             all_mods = mod.split(',')
                             for pos_ccd in all_mods:
                                 if ':' not in pos_ccd:
+                                    return (f'Invalid modification for Entity {i//component_cnt+1}, please use ":" to '
                                             f'separate residue and CCD!\n')
                                 pos, ccd = pos_ccd.split(':')
                                 modifications.append({'position': int(pos), 'ccd': ccd})
                         if entity_type == 'ligand':
                             curr_dict = {entity_type: {'id'    : id,
+                                                       seq_key : seq,}}
                         else:
                             curr_dict = {entity_type: {'id'    : id,
                                                        seq_key : seq.upper(),
+                                                       'cyclic': cyclic}}
+                        # Check for MSA
+                        if entity_type == 'protein':
+                            if msa_pth and use_msa:
+                                target_msa = os.path.join(msa_dir, msa_rng_name, os.path.basename(msa_pth))
+                                os.makedirs(os.path.dirname(target_msa), exist_ok=True)
+                                shutil.copy(msa_pth, target_msa)
+                                curr_dict[entity_type]['msa'] = target_msa
+                            elif not use_msa:
+                                curr_dict[entity_type]['msa'] = 'empty'
                         if modifications is not None:
                             curr_dict[entity_type]['modifications'] = modifications