app.py · hemantn/ablang2 at main

File size: 12,857 Bytes

712d350

import gradio as gr
import sys
import os
from transformers import AutoModel, AutoTokenizer
from transformers.utils import cached_file

# Load model and tokenizer from Hugging Face Hub
model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)

# Find the cached model directory and import adapter
adapter_path = cached_file("hemantn/ablang2", "adapter.py")
cached_model_dir = os.path.dirname(adapter_path)
sys.path.insert(0, cached_model_dir)

# Import and create the adapter
from adapter import AbLang2PairedHuggingFaceAdapter
ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer)

def restore_sequences(heavy_chain, light_chain, use_align=False):
    """
    Restore masked residues in antibody sequences.
    
    Args:
        heavy_chain (str): Heavy chain sequence with masked residues (*)
        light_chain (str): Light chain sequence with masked residues (*)
        use_align (bool): Whether to use alignment for variable missing lengths
    
    Returns:
        tuple: (restored_heavy, restored_light, highlighted_heavy, highlighted_light)
    """
    try:
        # Check if alignment is requested but not available
        if use_align:
            try:
                import anarci
            except ImportError:
                return "Alignment feature requires 'anarci' package which is not available. Please disable alignment option.", "", ""
        # Prepare input sequences
        if heavy_chain.strip() and light_chain.strip():
            # Both chains provided
            sequences = [[heavy_chain.strip(), light_chain.strip()]]
        elif heavy_chain.strip():
            # Only heavy chain provided
            sequences = [[heavy_chain.strip(), ""]]
        elif light_chain.strip():
            # Only light chain provided
            sequences = [["", light_chain.strip()]]
        else:
            return "Please provide at least one antibody chain sequence.", "", "", ""
        
        # Perform restoration
        restored = ablang(sequences, mode='restore', align=use_align)
        
        # Format output
        if hasattr(restored, '__len__') and len(restored) > 0:
            result = restored[0]  # Get the first (and only) result
            
            # Parse the result to separate heavy and light chains
            if '>|<' in result:
                # Both chains present
                heavy_part = result.split('>|<')[0].replace('<', '').replace('>', '')
                light_part = result.split('>|<')[1].replace('<', '').replace('>', '')
            elif result.startswith('<') and result.endswith('>'):
                # Only one chain present
                if heavy_chain.strip():
                    heavy_part = result.replace('<', '').replace('>', '')
                    light_part = ""
                else:
                    heavy_part = ""
                    light_part = result.replace('<', '').replace('>', '')
            else:
                return "Error: Unexpected result format.", "", "", ""
            
            # Create highlighted versions
            highlighted_heavy = highlight_restored_residues(heavy_chain.strip(), heavy_part)
            highlighted_light = highlight_restored_residues(light_chain.strip(), light_part)
            
            # Create HTML outputs with proper styling - no scroll, wrap text
            heavy_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_heavy}</div>'
            light_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_light}</div>'
            
            return heavy_html, light_html
        else:
            return "Error: No restoration result obtained.", "", ""
            
    except Exception as e:
        return f"Error during restoration: {str(e)}", "", ""

def highlight_restored_residues(original_seq, restored_seq):
    """
    Highlight restored residues in green.
    """
    if not original_seq or not restored_seq:
        return restored_seq
    
    highlighted = ""
    for i, (orig_char, rest_char) in enumerate(zip(original_seq, restored_seq)):
        if orig_char == '*' and rest_char != '*':
            # This residue was restored
            highlighted += f'<span class="restored-highlight">{rest_char}</span>'
        else:
            highlighted += rest_char
    
    # Add any remaining characters from restored sequence
    if len(restored_seq) > len(original_seq):
        highlighted += restored_seq[len(original_seq):]
    
    return highlighted

# Create Gradio interface
with gr.Blocks(title="AbLang2 Sequence Restorer", theme=gr.themes.Soft(), css="""
    * {
        font-family: 'Courier New', monospace !important;
    }
    .sequence-input, .sequence-output {
        font-family: 'Courier New', monospace !important;
        font-size: 14px !important;
        letter-spacing: 0.5px !important;
    }
    .restored-highlight {
        background-color: #90EE90 !important;
        color: #000 !important;
        font-weight: bold !important;
    }
    .examples {
        font-family: 'Courier New', monospace !important;
        font-size: 14px !important;
        letter-spacing: 0.5px !important;
    }
    .restored-sequence-box {
        font-family: 'Courier New', monospace !important;
        font-size: 14px !important;
        letter-spacing: 0.5px !important;
        white-space: pre-wrap !important;
        word-wrap: break-word !important;
        overflow-wrap: break-word !important;
    }
    .restored-heading {
        color: #2E8B57 !important;
        font-weight: bold !important;
        font-size: 18px !important;
    }
    .example-text {
        font-family: 'Courier New', monospace !important;
        font-size: 12px !important;
        white-space: pre-wrap !important;
        word-wrap: break-word !important;
    }
    .examples-table {
        font-family: 'Courier New', monospace !important;
        font-size: 12px !important;
        white-space: pre-wrap !important;
        word-wrap: break-word !important;
        max-width: none !important;
        overflow: visible !important;
    }
    .examples-table td {
        font-family: 'Courier New', monospace !important;
        font-size: 12px !important;
        white-space: pre-wrap !important;
        word-wrap: break-word !important;
        max-width: none !important;
        overflow: visible !important;
        text-overflow: unset !important;
    }
    .sequence-output label {
        font-weight: bold !important;
        color: #495057 !important;
        font-size: 14px !important;
        margin-bottom: 5px !important;
    }
    /* Force full display of examples */
    .examples-container {
        font-family: 'Courier New', monospace !important;
        font-size: 12px !important;
    }
    .examples-container table {
        width: 100% !important;
        table-layout: auto !important;
    }
    .examples-container td {
        white-space: pre-wrap !important;
        word-wrap: break-word !important;
        overflow-wrap: break-word !important;
        max-width: none !important;
        text-overflow: unset !important;
        padding: 8px !important;
        vertical-align: top !important;
    }
    .examples-container th {
        white-space: nowrap !important;
        padding: 8px !important;
    }
    /* Override any Gradio default truncation */
    .examples table td {
        white-space: pre-wrap !important;
        word-wrap: break-word !important;
        overflow-wrap: break-word !important;
        max-width: none !important;
        text-overflow: unset !important;
        overflow: visible !important;
        font-family: 'Courier New', monospace !important;
        font-size: 12px !important;
    }
    .examples table {
        table-layout: auto !important;
        width: 100% !important;
    }
    /* Target the specific examples component */
    div[data-testid="examples"] table td {
        white-space: pre-wrap !important;
        word-wrap: break-word !important;
        overflow-wrap: break-word !important;
        max-width: none !important;
        text-overflow: unset !important;
        overflow: visible !important;
        font-family: 'Courier New', monospace !important;
        font-size: 12px !important;
    }
    /* Force examples to show full content */
    .examples table, .examples table td, .examples table th {
        white-space: pre-wrap !important;
        word-wrap: break-word !important;
        overflow-wrap: break-word !important;
        max-width: none !important;
        text-overflow: unset !important;
        overflow: visible !important;
        font-family: 'Courier New', monospace !important;
        font-size: 12px !important;
        table-layout: auto !important;
        width: auto !important;
        min-width: 100% !important;
    }
    /* Override any inline styles */
    .examples * {
        white-space: pre-wrap !important;
        word-wrap: break-word !important;
        overflow-wrap: break-word !important;
        max-width: none !important;
        text-overflow: unset !important;
        overflow: visible !important;
    }
    /* Style output labels to match input labels exactly */
    .output-label {
        font-weight: 600 !important;
        color: var(--label-text-color) !important;
        font-size: 14px !important;
        margin-bottom: 8px !important;
        margin-top: 16px !important;
        line-height: 1.4 !important;
        display: block !important;
    }
""") as demo:
    gr.Markdown("""
    # 🧬 AbLang2 Sequence Restorer
    
    This app uses the AbLang2 model to restore masked residues (*) in antibody sequences. 
    You can provide either one or both heavy and light chain sequences.
    
    **Instructions:**
    - Use `*` to mask residues you want to restore
    - Provide heavy chain, light chain, or both
    - Enable "Use Alignment" for variable missing lengths
    """)
    
    with gr.Row():
        with gr.Column():
            heavy_input = gr.Textbox(
                label="Heavy Chain Sequence",
                placeholder="Enter heavy chain sequence with masked residues (*)...",
                lines=3,
                max_lines=5,
                elem_classes=["sequence-input"]
            )
            
            light_input = gr.Textbox(
                label="Light Chain Sequence", 
                placeholder="Enter light chain sequence with masked residues (*)...",
                lines=3,
                max_lines=5,
                elem_classes=["sequence-input"]
            )
            
            align_checkbox = gr.Checkbox(
                label="Use Alignment (for variable missing lengths) - Requires anarci package",
                value=False
            )
            
            restore_btn = gr.Button("🔄 Restore Sequences", variant="primary")
        
        with gr.Column():
            gr.Markdown("### 🧬 Restored Sequences", elem_classes=["restored-heading"])
            gr.Markdown("*Green highlighting shows restored residues*")
            
            gr.Markdown("**Heavy Chain Sequence**", elem_classes=["output-label"])
            heavy_output = gr.HTML(label="")
            
            gr.Markdown("**Light Chain Sequence**", elem_classes=["output-label"])
            light_output = gr.HTML(label="")
    
    # Example sequences
    gr.Examples(
        examples=[
            [
                "EVQ***SGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCAR**PGHGAAFMDVWGTGTTVTVSS",
                "DIQLTQSPLSLPVTLGQPASISCRSS*SLEASDTNIYLSWFQQRPGQSPRRLIYKI*NRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK"
            ],
            [
                "EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMGWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDY**GMDVWGQGTTVTVSS",
                ""
            ],
            [
                "",
                "DIQLTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIY*ASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTP*TFGQGTKVEIK"
            ]
        ],
        inputs=[heavy_input, light_input],
        label="Example Sequences"
    )
    
    # Connect the button to the function
    restore_btn.click(
        fn=restore_sequences,
        inputs=[heavy_input, light_input, align_checkbox],
        outputs=[heavy_output, light_output]
    )
    
    gr.Markdown("""
    ---
    **Note:** This app uses the AbLang2 model from Hugging Face Hub. 
    The restoration process may take a few seconds depending on sequence length and complexity.
    """)

if __name__ == "__main__":
    demo.launch()