Spaces:
Running on Zero
Running on Zero
| """Gradio app for Caliby sequence design.""" | |
| import base64 | |
| from pathlib import Path | |
| import gradio as gr | |
| # Eagerly import so the wandb/pydantic init runs in the main thread | |
| # (where sys.modules['__main__'] exists), not in a Gradio worker thread. | |
| import caliby.data.preprocessing.atomworks.clean_pdbs # noqa: F401 | |
| from design import design_sequences | |
| from file_utils import _get_file_path, _write_zip_from_paths | |
| from viewers import ( | |
| _csv_download_output, | |
| _file_output, | |
| _format_results_display, | |
| _get_best_sc_sample, | |
| _render_af2_viewer, | |
| _update_viewers, | |
| ) | |
| def _get_upload_instructions(mode: str) -> str: | |
| if mode == "none": | |
| return "Upload a single PDB or CIF file." | |
| elif mode == "synthetic": | |
| return "Upload a single PDB or CIF file. Conformers will be generated automatically." | |
| else: | |
| return "Upload all PDB files — primary conformer first, then additional conformers." | |
| def _clean_uploaded_pdbs(pdb_files: list | None): | |
| if not pdb_files: | |
| return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=False) | |
| from caliby import clean_pdbs | |
| pdb_paths = [str(_get_file_path(f)) for f in pdb_files] | |
| cleaned_paths = clean_pdbs(pdb_paths) | |
| zip_path = _write_zip_from_paths(cleaned_paths, "cleaned_pdbs", ".zip") | |
| return ( | |
| cleaned_paths, | |
| gr.update( | |
| value="**Note:** Your files have been cleaned and standardized to mmCIF format " | |
| "to avoid downstream parsing and alignment issues. " | |
| "If you plan to use positional constraints, please download the cleaned files and double " | |
| "check the new residue indices.", | |
| visible=True, | |
| ), | |
| gr.update(value=zip_path, visible=True), | |
| gr.update(interactive=True), | |
| ) | |
| def _reset_cleaned_state(): | |
| return None, gr.update(visible=False), gr.update(visible=False), gr.update(interactive=False) | |
| def submit_design_sequences( | |
| cleaned_files: list[str] | None, | |
| ensemble_mode: str, | |
| model_variant: str, | |
| num_seqs: int, | |
| omit_aas: list[str] | None, | |
| temperature: float, | |
| fixed_pos_seq: str, | |
| fixed_pos_scn: str, | |
| fixed_pos_override_seq: str, | |
| pos_restrict_aatype: str, | |
| symmetry_pos: str, | |
| num_protpardelle_conformers: int, | |
| run_af2_eval: bool = False, | |
| ): | |
| df, fasta_text, out_zip_path, sc_zip_path, af2_pdb_data, input_pdb_data = design_sequences( | |
| pdb_files=cleaned_files, | |
| ensemble_mode=ensemble_mode, | |
| model_variant=model_variant, | |
| num_seqs=num_seqs, | |
| omit_aas=omit_aas, | |
| temperature=temperature, | |
| fixed_pos_seq=fixed_pos_seq, | |
| fixed_pos_scn=fixed_pos_scn, | |
| fixed_pos_override_seq=fixed_pos_override_seq, | |
| pos_restrict_aatype=pos_restrict_aatype, | |
| symmetry_pos=symmetry_pos, | |
| num_protpardelle_conformers=num_protpardelle_conformers, | |
| run_af2_eval=run_af2_eval, | |
| ) | |
| has_af2 = bool(af2_pdb_data) | |
| best_sample = _get_best_sc_sample(df) if has_af2 else "" | |
| af2_html = _render_af2_viewer(best_sample, af2_pdb_data) if has_af2 else "" | |
| return ( | |
| gr.update(visible=True), | |
| gr.update(value=_format_results_display(df), visible=True), | |
| df, | |
| gr.update(value=fasta_text, visible=True), | |
| _file_output(out_zip_path), | |
| _file_output(sc_zip_path), | |
| af2_pdb_data, | |
| input_pdb_data, | |
| best_sample, | |
| gr.update(visible=has_af2), | |
| af2_html, | |
| gr.update(value="", visible=False), | |
| gr.update(visible=False), | |
| gr.update(visible=False), | |
| ) | |
| theme = gr.themes.Base( | |
| primary_hue="amber", | |
| secondary_hue="orange", | |
| radius_size="lg", | |
| font=[gr.themes.GoogleFont('Instrument Sans'), 'ui-sans-serif', 'system-ui', 'sans-serif'], | |
| ).set( | |
| body_text_color='*neutral_700', | |
| body_text_color_dark='*neutral_300', | |
| body_text_color_subdued='*neutral_500', | |
| block_title_text_color='*neutral_700', | |
| block_info_text_color='*neutral_500', | |
| block_border_width_dark='0px', | |
| block_padding='*spacing_xl calc(*spacing_xl + 3px)', | |
| block_label_border_width_dark='0px', | |
| block_label_padding='*spacing_md *spacing_lg', | |
| button_secondary_background_fill_dark='*neutral_600', | |
| checkbox_label_text_color_dark='*neutral_100', | |
| ) | |
| css = """ | |
| .loading-pulse { animation: pulse 2.5s ease-in-out infinite; } | |
| @keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.3; } } | |
| .omit-aa-dropdown ul { max-height: 200px !important; overflow-y: auto; } | |
| .compact-file .large { min-height: 50px !important; } | |
| #results-table th:nth-child(2), | |
| #results-table td:nth-child(2) { | |
| max-width: 28rem; | |
| width: 28rem; | |
| } | |
| #results-table td:nth-child(2) { | |
| overflow: hidden; | |
| } | |
| #results-table td:nth-child(2) > div { | |
| display: block; | |
| max-width: 100%; | |
| overflow-x: auto; | |
| overflow-y: hidden; | |
| white-space: nowrap !important; | |
| scrollbar-width: thin; | |
| } | |
| #af2-viewer, #ref-viewer { | |
| display: flex; | |
| justify-content: center; | |
| } | |
| #af2-viewer iframe, #ref-viewer iframe { | |
| max-width: 100%; | |
| } | |
| """ | |
| _LOGO_B64 = base64.b64encode(Path(__file__).with_name("caliby_transparent.png").read_bytes()).decode() | |
| with gr.Blocks(title="Caliby - Protein Sequence Design") as demo: | |
| gr.HTML( | |
| '<div style="display: flex; align-items: center; gap: 16px;">' | |
| f'<img src="data:image/png;base64,{_LOGO_B64}" alt="Caliby logo" style="height: 80px;">' | |
| '<h1 style="margin: 0;">Caliby - Protein Sequence Design</h1>' | |
| '</div>' | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| model_variant = gr.Radio( | |
| choices=[ | |
| ("Caliby", "caliby"), | |
| ("SolubleCaliby v1", "soluble_caliby_v1"), | |
| ], | |
| value="caliby", | |
| label="Model", | |
| ) | |
| ensemble_mode = gr.Radio( | |
| choices=[ | |
| ("Fixed backbone", "none"), | |
| ("Synthetic ensemble", "synthetic"), | |
| ("Upload your own ensemble", "user"), | |
| ], | |
| value="synthetic", | |
| label="Ensemble mode", | |
| ) | |
| run_af2_eval = gr.Checkbox( | |
| label="Run AF2 self-consistency evaluation", | |
| value=False, | |
| info="Refold designed sequences with AlphaFold2 and compute scRMSD, pLDDT, and TM-score", | |
| ) | |
| upload_instructions = gr.Markdown( | |
| _get_upload_instructions("synthetic"), | |
| ) | |
| pdb_input = gr.File( | |
| file_count="multiple", | |
| label="PDB/CIF file(s)", | |
| file_types=[".pdb", ".cif"], | |
| ) | |
| finish_upload_btn = gr.Button("Upload", variant="secondary") | |
| cleaned_files_state = gr.State(None) | |
| clean_notification = gr.Markdown(visible=False) | |
| clean_download = gr.File( | |
| label="Download cleaned files", visible=False, elem_classes=["compact-file"] | |
| ) | |
| num_seqs = gr.Slider( | |
| minimum=1, | |
| maximum=4, | |
| value=1, | |
| step=1, | |
| label="Number of sequences", | |
| ) | |
| omit_aas = gr.Dropdown( | |
| choices=[ | |
| "A", | |
| "C", | |
| "D", | |
| "E", | |
| "F", | |
| "G", | |
| "H", | |
| "I", | |
| "K", | |
| "L", | |
| "M", | |
| "N", | |
| "P", | |
| "Q", | |
| "R", | |
| "S", | |
| "T", | |
| "V", | |
| "W", | |
| "Y", | |
| ], | |
| multiselect=True, | |
| label="Amino acids to omit", | |
| elem_classes=["omit-aa-dropdown"], | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.01, | |
| maximum=1, | |
| value=0.01, | |
| step=0.01, | |
| label="Sampling temperature", | |
| ) | |
| submit_btn = gr.Button("Design sequences", variant="primary", interactive=False) | |
| with gr.Accordion("Advanced constraints", open=False): | |
| fixed_pos_seq = gr.Textbox( | |
| label="Fixed positions", | |
| info="Format: A1-100,B1-100 \nSequence positions in the input PDB to condition on so that they" | |
| " remain fixed during design. For ensemble-conditioned design, fixed_pos_seq is applied using" | |
| " the primary conformer's sequence.", | |
| placeholder="e.g. A1-100,B1-100", | |
| ) | |
| fixed_pos_scn = gr.Textbox( | |
| label="Fixed sidechain positions", | |
| info="Format: A1-10,A12,A15-20 \nSidechain positions in the input PDB to condition on so that they" | |
| " remain fixed during design. Note that fixed sidechain positions must be a subset of fixed" | |
| " sequence positions, since it does not make sense to condition on a sidechain without also" | |
| " conditioning on its sequence identity.", | |
| placeholder="e.g. A1-10,A12,A15-20", | |
| ) | |
| fixed_pos_override_seq = gr.Textbox( | |
| label="Override sequence at positions", | |
| info="Format: A26:A,A27:L \nSequence positions in the input PDB to first override the sequence at," | |
| " and then condition on. The colon separates the position and the desired amino acid.", | |
| placeholder="e.g. A26:A,A27:L", | |
| ) | |
| pos_restrict_aatype = gr.Textbox( | |
| label="Position restrictions", | |
| info="Format: A26:AVG,A27:VG \nAllowed amino acids for certain positions in the input PDB. The" | |
| " colon separates the position and the allowed amino acids.", | |
| placeholder="e.g. A26:AVG,A27:VG", | |
| ) | |
| symmetry_pos = gr.Textbox( | |
| label="Symmetry positions", | |
| info="Format: A10,B10,C10|A11,B11,C11 \nSymmetry positions for tying sampling across residue" | |
| " positions. The pipe separates groups of positions to sample symmetrically. In the example," | |
| " A10, B10, and C10 are tied together, and A11, B11, and C11 are tied together.", | |
| placeholder="e.g. A10,B10,C10|A11,B11,C11", | |
| ) | |
| num_protpardelle_conformers = gr.Slider( | |
| minimum=1, | |
| maximum=15, | |
| value=15, | |
| step=1, | |
| label="Number of conformers to generate", | |
| visible=True, | |
| ) | |
| with gr.Column(scale=2): | |
| raw_results_df = gr.State(None) | |
| af2_pdb_state = gr.State({}) | |
| input_pdb_state = gr.State({}) | |
| best_sample_state = gr.State("") | |
| results_placeholder = gr.Markdown( | |
| "Results will appear here after designing sequences.", | |
| ) | |
| results_header = gr.Markdown("### Results", visible=False) | |
| results_df = gr.Dataframe( | |
| show_label=False, | |
| interactive=False, | |
| wrap=False, | |
| column_widths=[160, 448], | |
| elem_id="results-table", | |
| visible=False, | |
| ) | |
| fasta_output = gr.Textbox( | |
| label="Sequences (FASTA)", | |
| lines=10, | |
| visible=False, | |
| ) | |
| with gr.Row(): | |
| csv_download = gr.File(label="Download results CSV", elem_classes=["compact-file"], visible=False) | |
| output_files = gr.File(label="Download CIF files", elem_classes=["compact-file"], visible=False) | |
| sc_output_files = gr.File( | |
| label="Download AF2 self-consistency outputs", | |
| elem_classes=["compact-file"], | |
| visible=False, | |
| ) | |
| with gr.Column(visible=False) as viewer_section: | |
| gr.Markdown("---") | |
| with gr.Row(): | |
| gr.Markdown("### AF2 Prediction") | |
| af2_color_mode = gr.Dropdown( | |
| choices=[ | |
| ("pLDDT", "plddt"), | |
| ("Chain", "chain"), | |
| ("Rainbow", "rainbow"), | |
| ("Secondary structure", "secondary"), | |
| ], | |
| value="plddt", | |
| label="Color by", | |
| scale=0, | |
| ) | |
| af2_viewer = gr.HTML(elem_id="af2-viewer") | |
| show_overlay = gr.Checkbox(label="Show reference structure", value=False) | |
| with gr.Column(visible=False) as ref_section: | |
| with gr.Row(): | |
| gr.Markdown("### Reference Structure") | |
| ref_color_mode = gr.Dropdown( | |
| choices=[ | |
| ("Chain", "chain"), | |
| ("pLDDT", "plddt"), | |
| ("Rainbow", "rainbow"), | |
| ("Secondary structure", "secondary"), | |
| ], | |
| value="chain", | |
| label="Color by", | |
| scale=0, | |
| ) | |
| reference_viewer = gr.HTML(elem_id="ref-viewer") | |
| submit_btn.click( | |
| fn=lambda: gr.update(value='<div class="loading-pulse">Running design pipeline\u2026</div>', visible=True), | |
| outputs=[results_placeholder], | |
| ).then( | |
| fn=submit_design_sequences, | |
| inputs=[ | |
| cleaned_files_state, | |
| ensemble_mode, | |
| model_variant, | |
| num_seqs, | |
| omit_aas, | |
| temperature, | |
| fixed_pos_seq, | |
| fixed_pos_scn, | |
| fixed_pos_override_seq, | |
| pos_restrict_aatype, | |
| symmetry_pos, | |
| num_protpardelle_conformers, | |
| run_af2_eval, | |
| ], | |
| outputs=[ | |
| results_header, | |
| results_df, | |
| raw_results_df, | |
| fasta_output, | |
| output_files, | |
| sc_output_files, | |
| af2_pdb_state, | |
| input_pdb_state, | |
| best_sample_state, | |
| viewer_section, | |
| af2_viewer, | |
| reference_viewer, | |
| ref_section, | |
| results_placeholder, | |
| ], | |
| ) | |
| raw_results_df.change(fn=_csv_download_output, inputs=[raw_results_df], outputs=[csv_download]) | |
| finish_upload_btn.click( | |
| fn=lambda: gr.update(value="Processing\u2026", interactive=False), | |
| outputs=[finish_upload_btn], | |
| ).then( | |
| fn=_clean_uploaded_pdbs, | |
| inputs=[pdb_input], | |
| outputs=[cleaned_files_state, clean_notification, clean_download, submit_btn], | |
| ).then( | |
| fn=lambda: gr.update(value="Upload", interactive=True), | |
| outputs=[finish_upload_btn], | |
| ) | |
| pdb_input.change( | |
| fn=_reset_cleaned_state, | |
| outputs=[cleaned_files_state, clean_notification, clean_download, submit_btn], | |
| ) | |
| ensemble_mode.change( | |
| fn=lambda mode: (gr.update(visible=(mode == "synthetic")), _get_upload_instructions(mode)), | |
| inputs=[ensemble_mode], | |
| outputs=[num_protpardelle_conformers, upload_instructions], | |
| ) | |
| viewer_inputs = [best_sample_state, af2_pdb_state, input_pdb_state, show_overlay, af2_color_mode, ref_color_mode] | |
| viewer_outputs = [af2_viewer, reference_viewer, ref_section] | |
| show_overlay.change(fn=_update_viewers, inputs=viewer_inputs, outputs=viewer_outputs) | |
| af2_color_mode.change(fn=_update_viewers, inputs=viewer_inputs, outputs=viewer_outputs) | |
| ref_color_mode.change(fn=_update_viewers, inputs=viewer_inputs, outputs=viewer_outputs) | |
| if __name__ == "__main__": | |
| demo.launch(theme=theme, css=css, ssr_mode=False) | |