ablang2 / app.py
hemantn's picture
Integrate utility files into main repository - make self-contained
712d350
import gradio as gr
import sys
import os
from transformers import AutoModel, AutoTokenizer
from transformers.utils import cached_file
# Load model and tokenizer from Hugging Face Hub
model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
# Find the cached model directory and import adapter
adapter_path = cached_file("hemantn/ablang2", "adapter.py")
cached_model_dir = os.path.dirname(adapter_path)
sys.path.insert(0, cached_model_dir)
# Import and create the adapter
from adapter import AbLang2PairedHuggingFaceAdapter
ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer)
def restore_sequences(heavy_chain, light_chain, use_align=False):
"""
Restore masked residues in antibody sequences.
Args:
heavy_chain (str): Heavy chain sequence with masked residues (*)
light_chain (str): Light chain sequence with masked residues (*)
use_align (bool): Whether to use alignment for variable missing lengths
Returns:
tuple: (restored_heavy, restored_light, highlighted_heavy, highlighted_light)
"""
try:
# Check if alignment is requested but not available
if use_align:
try:
import anarci
except ImportError:
return "Alignment feature requires 'anarci' package which is not available. Please disable alignment option.", "", ""
# Prepare input sequences
if heavy_chain.strip() and light_chain.strip():
# Both chains provided
sequences = [[heavy_chain.strip(), light_chain.strip()]]
elif heavy_chain.strip():
# Only heavy chain provided
sequences = [[heavy_chain.strip(), ""]]
elif light_chain.strip():
# Only light chain provided
sequences = [["", light_chain.strip()]]
else:
return "Please provide at least one antibody chain sequence.", "", "", ""
# Perform restoration
restored = ablang(sequences, mode='restore', align=use_align)
# Format output
if hasattr(restored, '__len__') and len(restored) > 0:
result = restored[0] # Get the first (and only) result
# Parse the result to separate heavy and light chains
if '>|<' in result:
# Both chains present
heavy_part = result.split('>|<')[0].replace('<', '').replace('>', '')
light_part = result.split('>|<')[1].replace('<', '').replace('>', '')
elif result.startswith('<') and result.endswith('>'):
# Only one chain present
if heavy_chain.strip():
heavy_part = result.replace('<', '').replace('>', '')
light_part = ""
else:
heavy_part = ""
light_part = result.replace('<', '').replace('>', '')
else:
return "Error: Unexpected result format.", "", "", ""
# Create highlighted versions
highlighted_heavy = highlight_restored_residues(heavy_chain.strip(), heavy_part)
highlighted_light = highlight_restored_residues(light_chain.strip(), light_part)
# Create HTML outputs with proper styling - no scroll, wrap text
heavy_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_heavy}</div>'
light_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_light}</div>'
return heavy_html, light_html
else:
return "Error: No restoration result obtained.", "", ""
except Exception as e:
return f"Error during restoration: {str(e)}", "", ""
def highlight_restored_residues(original_seq, restored_seq):
"""
Highlight restored residues in green.
"""
if not original_seq or not restored_seq:
return restored_seq
highlighted = ""
for i, (orig_char, rest_char) in enumerate(zip(original_seq, restored_seq)):
if orig_char == '*' and rest_char != '*':
# This residue was restored
highlighted += f'<span class="restored-highlight">{rest_char}</span>'
else:
highlighted += rest_char
# Add any remaining characters from restored sequence
if len(restored_seq) > len(original_seq):
highlighted += restored_seq[len(original_seq):]
return highlighted
# Create Gradio interface
with gr.Blocks(title="AbLang2 Sequence Restorer", theme=gr.themes.Soft(), css="""
* {
font-family: 'Courier New', monospace !important;
}
.sequence-input, .sequence-output {
font-family: 'Courier New', monospace !important;
font-size: 14px !important;
letter-spacing: 0.5px !important;
}
.restored-highlight {
background-color: #90EE90 !important;
color: #000 !important;
font-weight: bold !important;
}
.examples {
font-family: 'Courier New', monospace !important;
font-size: 14px !important;
letter-spacing: 0.5px !important;
}
.restored-sequence-box {
font-family: 'Courier New', monospace !important;
font-size: 14px !important;
letter-spacing: 0.5px !important;
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
}
.restored-heading {
color: #2E8B57 !important;
font-weight: bold !important;
font-size: 18px !important;
}
.example-text {
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
white-space: pre-wrap !important;
word-wrap: break-word !important;
}
.examples-table {
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
white-space: pre-wrap !important;
word-wrap: break-word !important;
max-width: none !important;
overflow: visible !important;
}
.examples-table td {
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
white-space: pre-wrap !important;
word-wrap: break-word !important;
max-width: none !important;
overflow: visible !important;
text-overflow: unset !important;
}
.sequence-output label {
font-weight: bold !important;
color: #495057 !important;
font-size: 14px !important;
margin-bottom: 5px !important;
}
/* Force full display of examples */
.examples-container {
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
}
.examples-container table {
width: 100% !important;
table-layout: auto !important;
}
.examples-container td {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: none !important;
text-overflow: unset !important;
padding: 8px !important;
vertical-align: top !important;
}
.examples-container th {
white-space: nowrap !important;
padding: 8px !important;
}
/* Override any Gradio default truncation */
.examples table td {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: none !important;
text-overflow: unset !important;
overflow: visible !important;
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
}
.examples table {
table-layout: auto !important;
width: 100% !important;
}
/* Target the specific examples component */
div[data-testid="examples"] table td {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: none !important;
text-overflow: unset !important;
overflow: visible !important;
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
}
/* Force examples to show full content */
.examples table, .examples table td, .examples table th {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: none !important;
text-overflow: unset !important;
overflow: visible !important;
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
table-layout: auto !important;
width: auto !important;
min-width: 100% !important;
}
/* Override any inline styles */
.examples * {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: none !important;
text-overflow: unset !important;
overflow: visible !important;
}
/* Style output labels to match input labels exactly */
.output-label {
font-weight: 600 !important;
color: var(--label-text-color) !important;
font-size: 14px !important;
margin-bottom: 8px !important;
margin-top: 16px !important;
line-height: 1.4 !important;
display: block !important;
}
""") as demo:
gr.Markdown("""
# 🧬 AbLang2 Sequence Restorer
This app uses the AbLang2 model to restore masked residues (*) in antibody sequences.
You can provide either one or both heavy and light chain sequences.
**Instructions:**
- Use `*` to mask residues you want to restore
- Provide heavy chain, light chain, or both
- Enable "Use Alignment" for variable missing lengths
""")
with gr.Row():
with gr.Column():
heavy_input = gr.Textbox(
label="Heavy Chain Sequence",
placeholder="Enter heavy chain sequence with masked residues (*)...",
lines=3,
max_lines=5,
elem_classes=["sequence-input"]
)
light_input = gr.Textbox(
label="Light Chain Sequence",
placeholder="Enter light chain sequence with masked residues (*)...",
lines=3,
max_lines=5,
elem_classes=["sequence-input"]
)
align_checkbox = gr.Checkbox(
label="Use Alignment (for variable missing lengths) - Requires anarci package",
value=False
)
restore_btn = gr.Button("🔄 Restore Sequences", variant="primary")
with gr.Column():
gr.Markdown("### 🧬 Restored Sequences", elem_classes=["restored-heading"])
gr.Markdown("*Green highlighting shows restored residues*")
gr.Markdown("**Heavy Chain Sequence**", elem_classes=["output-label"])
heavy_output = gr.HTML(label="")
gr.Markdown("**Light Chain Sequence**", elem_classes=["output-label"])
light_output = gr.HTML(label="")
# Example sequences
gr.Examples(
examples=[
[
"EVQ***SGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCAR**PGHGAAFMDVWGTGTTVTVSS",
"DIQLTQSPLSLPVTLGQPASISCRSS*SLEASDTNIYLSWFQQRPGQSPRRLIYKI*NRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK"
],
[
"EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMGWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDY**GMDVWGQGTTVTVSS",
""
],
[
"",
"DIQLTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIY*ASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTP*TFGQGTKVEIK"
]
],
inputs=[heavy_input, light_input],
label="Example Sequences"
)
# Connect the button to the function
restore_btn.click(
fn=restore_sequences,
inputs=[heavy_input, light_input, align_checkbox],
outputs=[heavy_output, light_output]
)
gr.Markdown("""
---
**Note:** This app uses the AbLang2 model from Hugging Face Hub.
The restoration process may take a few seconds depending on sequence length and complexity.
""")
if __name__ == "__main__":
demo.launch()