| import html |
| import io |
| import random |
| import zipfile |
| import gradio as gr |
| import py3Dmol |
| import json |
| import os |
|
|
| from tempfile import TemporaryDirectory |
| from Bio.PDB import PDBList |
|
|
| from utils import * |
| from multiana import * |
| import datetime |
|
|
| default_file = "static/test.pdb" |
|
|
| TEMP_DIR = "static/tmp/" |
| os.makedirs(TEMP_DIR, exist_ok=True) |
|
|
|
|
| def render_structure(structure_str, summary, entity_color_dict, add_label=True): |
| view = py3Dmol.view(width=233, height=233) |
| view.addModel(structure_str) |
| set_default_styles(view, summary, entity_color_dict, add_label=add_label) |
| view.zoomTo() |
| return view |
|
|
|
|
| def render_html(view, entity_color_dict): |
| output = view._make_html().replace( |
| "height: 233px;", |
| "height: 700px; max-height: 100%;" |
| ).replace( |
| "width: 233px;", |
| "width: 100%;" |
| ) |
| |
| |
| legend_items = "".join([ |
| f"<div style='display:inline-block; margin: 4px;'>" |
| f"<span style='display:inline-block; width: 12px; height: 12px; background:{color}; margin-right:5px; border:1px solid #333;'></span>" |
| f"<span>{label}</span></div>" |
| for label, color in entity_color_dict.items() |
| ]) |
| legend_html = f"<div style='margin-top:20px; text-align:center;'>{legend_items}</div>".replace("'", '"') |
| |
| |
| escaped_output = html.escape(output) |
| escaped_legend_html = html.escape(legend_html) |
|
|
| |
| html_content = f"""<!DOCTYPE html><html><body><center>{escaped_output}</center>{escaped_legend_html}</body></html>""" |
|
|
| html_framework = f"""<iframe style=\"width: 100%; height: 800px;\" name=\"result\" |
| allow=\"midi; geolocation; microphone; camera; display-capture; encrypted-media;\" |
| sandbox=\"allow-modals allow-forms allow-scripts allow-same-origin allow-popups |
| allow-top-navigation-by-user-activation allow-downloads\" |
| allowfullscreen=\"\" allowpaymentrequest=\"\" frameborder=\"0\" |
| srcdoc='{html_content}'></iframe>""" |
| |
| with open(os.path.join(TEMP_DIR, "structure_view.html"), "w") as f: |
| f.write(html_content) |
| return html_framework |
|
|
| def analyze_contacts(selected_str, cutoff, structure_cache): |
| keys = selected_str |
|
|
| if len(keys) < 2: |
| debug_text = "<b style='color:red'>请至少选择两个实体进行分析</b>" |
| return gr.update(), debug_text |
| |
| summary = structure_cache["summary"] |
| structure_str = structure_cache["structure_str"] |
| entity_color_dict = structure_cache["entity_color_dict"] |
|
|
| result = extract_contact_residues(summary, keys, cutoff) |
| view = render_structure(structure_str, summary, entity_color_dict, add_label=False) |
| for name, residue_list in result.items(): |
| highlight_residues(view, residue_list, name=name) |
| flush_html = render_html(view, entity_color_dict) |
|
|
| report = {k: [x['resn'] + str(x['resi']) for x in v] for k, v in result.items()} |
|
|
| return flush_html, report |
|
|
| def load_structure(file_path): |
| structure_str, summary, entity_color_dict, structure_dict = read_file(file_path) |
| view = render_structure(structure_str, summary, entity_color_dict) |
| html_out = render_html(view, entity_color_dict) |
| return html_out, gr.Dropdown(label="选择实体", choices=list(summary.keys()), interactive=True, value=[]), structure_dict |
|
|
| def update_selected(selected, current): |
| if selected in current: |
| return current |
| current = current + "; " + selected if current else selected |
| return current |
|
|
| def delete_selected(selected, current): |
| current = "; ".join([s for s in current.split("; ") if s != selected]) |
| return current |
|
|
| def clear_selected(): |
| return "" |
|
|
| def handle_file_upload(file): |
| if file: |
| return load_structure(file.name) |
| else: |
| |
| return gr.update(), gr.update(), gr.update() |
|
|
| def handle_pdb_id_input(pdb_id): |
| try: |
| pdb_id = pdb_id.strip().lower() |
| pdbl = PDBList() |
| |
| with TemporaryDirectory() as temp_dir: |
| pdbl.retrieve_pdb_file(pdb_id, pdir=temp_dir, file_format='pdb') |
| pdb_file_path = os.path.join(temp_dir, f"pdb{pdb_id}.ent") |
| html_out, dd, structure_dict = load_structure(pdb_file_path) |
| return html_out, dd, structure_dict |
| except Exception as e: |
| error_message = f"<b style='color:red'>获取PDB ID {pdb_id} 失败 {e}</b>" |
| return error_message, gr.update(), gr.update() |
|
|
| def render_cache(structure_cache): |
| summary = structure_cache["summary"] |
| structure_str = structure_cache["structure_str"] |
| entity_color_dict = structure_cache["entity_color_dict"] |
| view = render_structure(structure_str, summary, entity_color_dict) |
| html_out = render_html(view, entity_color_dict) |
| return html_out |
|
|
| |
| def multi_uniprot(uniprot_id, pdb_num): |
| uniprot_id = uniprot_id.strip() |
| print(f"Fetching structures for UniProt ID: {uniprot_id} with limit {pdb_num}") |
| sequence, pdb_list = get_uniprot_info(uniprot_id) |
| |
| selected_pdb_ids = random.sample(pdb_list, min(pdb_num, len(pdb_list))) |
| pdbl = PDBList() |
| print("Zipping PDB files") |
|
|
| timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") |
| zip_file_path = os.path.join(TEMP_DIR, f"{uniprot_id}_structures_{timestamp}.zip") |
|
|
| with TemporaryDirectory() as group_path: |
| for pdb_id in selected_pdb_ids: |
| pdbl.retrieve_pdb_file(pdb_id, pdir=group_path, file_format='pdb') |
| |
| with zipfile.ZipFile(zip_file_path, 'w') as z: |
| for pdb_id in selected_pdb_ids: |
| pdb_file_path = os.path.join(group_path, f"pdb{pdb_id.lower()}.ent") |
| z.write(pdb_file_path, arcname=os.path.basename(pdb_file_path)) |
| |
| return zip_file_path, sequence, selected_pdb_ids |
|
|
| def multi_zip(zip_file, seq_input, |
| multi_cutoff, identity_threshold, |
| target_entity_keys=None, |
| cnt_by_file=True): |
| |
| if zip_file is None: |
| return gr.update(value="<b style='color:red'>无效的ZIP文件</b>"), None |
| with TemporaryDirectory() as group_path: |
| if isinstance(zip_file, str): |
| |
| with zipfile.ZipFile(zip_file, 'r') as z: |
| z.extractall(group_path) |
| |
| os.remove(zip_file) |
| else: |
| |
| zip_bytes = io.BytesIO(zip_file) |
| with zipfile.ZipFile(zip_bytes, 'r') as z: |
| z.extractall(group_path) |
|
|
| seq_fixed = sequence_fix(seq_input) |
| if not seq_fixed: |
| return gr.update(value="<b style='color:red'>无效的序列格式,请输入有效的FASTA或纯氨基酸序列</b>"), None |
| |
| result = analyze_group( |
| group_path, |
| seq_fixed, |
| cutoff=multi_cutoff, |
| match_ratio=identity_threshold / 100, |
| target_entity_keys=target_entity_keys, |
| cnt_by_file=cnt_by_file |
| ) |
| svg_html = logo_plot(seq_fixed, result) |
| return svg_html, result |
|
|
| with gr.Blocks() as demo: |
| gr.HTML(get_text_content("static/gr_head.html")) |
| gr.HTML(get_text_content("static/gr_info.html")) |
| structure_cache = gr.State(value={"structure_str": None, "summary": None, "entity_color_dict": None}) |
| multi_result_cache = gr.State(value=None) |
| zip_cache = gr.State(value=None) |
|
|
| |
| with gr.Tab("Single Structure"): |
| output = gr.HTML() |
| with gr.Row(): |
| with gr.Column(scale=1): |
| |
|
|
| pdb_input = gr.Textbox( |
| label="输入 PDB ID 获取结构", |
| placeholder="Input PDB ID", |
| interactive=True |
| ) |
| pdb_btn = gr.Button("获取结构") |
|
|
| file_input = gr.File(label="或直接上传 PDB 文件", file_types=[".pdb", ".cif", ".ent"]) |
| with gr.Column(scale=2): |
| with gr.Row(): |
| entity_selector = gr.Dropdown(choices=[], interactive=True, multiselect=True, label="选择实体", scale=2) |
| cutoff_slider = gr.Slider(1, 10, value=3.5, step=0.5, label="Cutoff 距离 (Å)", scale=1) |
|
|
| run_btn = gr.Button("分析并渲染", variant="primary") |
| cln_btn = gr.Button("还原模型") |
|
|
| |
| with gr.Tab("Multi Structure"): |
| multi_logo = gr.HTML(MULTI_HTML_HOLDER) |
| with gr.Row(): |
| with gr.Column(): |
| with gr.Tab("从 UniProt 获取"): |
| uniprot_input = gr.Textbox( |
| label="输入 UniProt ID 获取结构", |
| placeholder="Input UniProt ID", |
| interactive=True, |
| scale=2, |
| ) |
| with gr.Row(): |
| pdb_num_slider = gr.Slider(1, 100, value=10, step=1, label="获取 PDB 数量上限(按设定数量随机采样)", interactive=True, scale=2) |
| uniprot_btn = gr.Button("抓取蛋白数据", variant="primary", scale=1) |
|
|
| with gr.Tab("手动上传结构压缩文件"): |
| zip_input = gr.File( |
| label="上传包含 .pdb/.ent/.cif 的 zip 压缩文件", |
| file_types=[".zip"], |
| type="binary", |
| scale=1, |
|
|
| ) |
| seq_input = gr.Textbox( |
| label="目标蛋白质序列", |
| placeholder="上传文件时需手动输入 FASTA 格式序列或纯氨基酸序列...", |
| lines=8, |
| scale=3, |
| ) |
| |
| with gr.Group(): |
| mult_target_selector = gr.Dropdown( |
| value=['Ligand'], |
| choices=['Ligand', 'Protein', 'DNA', 'RNA', 'Ion'], |
| label="选择互作对象类型(可多选,无选择则统计全部)", |
| multiselect=True, |
| interactive=True, |
| ) |
| with gr.Row(): |
| multi_cutoff_slider = gr.Slider(1, 10, value=3.5, step=0.5, label="Cutoff 距离 (Å)", interactive=True, scale=3) |
| cnt_checkbox = gr.Checkbox(label="Yes", info="单文件内不重复统计位点", value=True, interactive=True) |
|
|
| identity_threshold = gr.Slider(0, 100, value=80, step=5, label="序列一致性阈值 (%)", interactive=True) |
| multi_run_btn = gr.Button("开始分析", variant="primary") |
| |
| |
| debug_text = gr.Textbox(label="调试信息", interactive=False) |
|
|
|
|
| |
| run_btn.click( |
| fn=analyze_contacts, |
| inputs=[entity_selector, cutoff_slider, structure_cache], |
| outputs=[output, debug_text] |
| ) |
|
|
| cln_btn.click( |
| fn=render_cache, |
| inputs=[structure_cache], |
| outputs=[output] |
| ) |
|
|
| file_input.change( |
| fn=handle_file_upload, |
| inputs=file_input, |
| outputs=[output, entity_selector, structure_cache] |
| ) |
|
|
| pdb_btn.click( |
| fn=handle_pdb_id_input, |
| inputs=pdb_input, |
| outputs=[output, entity_selector, structure_cache] |
| ) |
|
|
| demo.load( |
| fn=lambda: load_structure(default_file), |
| inputs=[], |
| outputs=[output, entity_selector, structure_cache] |
| ) |
|
|
| |
| multi_run_btn.click( |
| fn=multi_zip, |
| inputs=[zip_input, seq_input, multi_cutoff_slider, identity_threshold, mult_target_selector, cnt_checkbox], |
| outputs=[multi_logo, multi_result_cache] |
| ) |
| uniprot_btn.click( |
| fn=multi_uniprot, |
| inputs=[uniprot_input, pdb_num_slider], |
| outputs=[zip_input, seq_input, debug_text] |
| ) |
|
|
| demo.launch() |