|
import gradio as gr |
|
import sys |
|
import os |
|
from transformers import AutoModel, AutoTokenizer |
|
from transformers.utils import cached_file |
|
|
|
|
|
model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True) |
|
tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True) |
|
|
|
|
|
adapter_path = cached_file("hemantn/ablang2", "adapter.py") |
|
cached_model_dir = os.path.dirname(adapter_path) |
|
sys.path.insert(0, cached_model_dir) |
|
|
|
|
|
from adapter import AbLang2PairedHuggingFaceAdapter |
|
ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer) |
|
|
|
def restore_sequences(heavy_chain, light_chain, use_align=False): |
|
""" |
|
Restore masked residues in antibody sequences. |
|
|
|
Args: |
|
heavy_chain (str): Heavy chain sequence with masked residues (*) |
|
light_chain (str): Light chain sequence with masked residues (*) |
|
use_align (bool): Whether to use alignment for variable missing lengths |
|
|
|
Returns: |
|
tuple: (restored_heavy, restored_light, highlighted_heavy, highlighted_light) |
|
""" |
|
try: |
|
|
|
if use_align: |
|
try: |
|
import anarci |
|
except ImportError: |
|
return "Alignment feature requires 'anarci' package which is not available. Please disable alignment option.", "", "" |
|
|
|
if heavy_chain.strip() and light_chain.strip(): |
|
|
|
sequences = [[heavy_chain.strip(), light_chain.strip()]] |
|
elif heavy_chain.strip(): |
|
|
|
sequences = [[heavy_chain.strip(), ""]] |
|
elif light_chain.strip(): |
|
|
|
sequences = [["", light_chain.strip()]] |
|
else: |
|
return "Please provide at least one antibody chain sequence.", "", "", "" |
|
|
|
|
|
restored = ablang(sequences, mode='restore', align=use_align) |
|
|
|
|
|
if hasattr(restored, '__len__') and len(restored) > 0: |
|
result = restored[0] |
|
|
|
|
|
if '>|<' in result: |
|
|
|
heavy_part = result.split('>|<')[0].replace('<', '').replace('>', '') |
|
light_part = result.split('>|<')[1].replace('<', '').replace('>', '') |
|
elif result.startswith('<') and result.endswith('>'): |
|
|
|
if heavy_chain.strip(): |
|
heavy_part = result.replace('<', '').replace('>', '') |
|
light_part = "" |
|
else: |
|
heavy_part = "" |
|
light_part = result.replace('<', '').replace('>', '') |
|
else: |
|
return "Error: Unexpected result format.", "", "", "" |
|
|
|
|
|
highlighted_heavy = highlight_restored_residues(heavy_chain.strip(), heavy_part) |
|
highlighted_light = highlight_restored_residues(light_chain.strip(), light_part) |
|
|
|
|
|
heavy_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_heavy}</div>' |
|
light_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_light}</div>' |
|
|
|
return heavy_html, light_html |
|
else: |
|
return "Error: No restoration result obtained.", "", "" |
|
|
|
except Exception as e: |
|
return f"Error during restoration: {str(e)}", "", "" |
|
|
|
def highlight_restored_residues(original_seq, restored_seq): |
|
""" |
|
Highlight restored residues in green. |
|
""" |
|
if not original_seq or not restored_seq: |
|
return restored_seq |
|
|
|
highlighted = "" |
|
for i, (orig_char, rest_char) in enumerate(zip(original_seq, restored_seq)): |
|
if orig_char == '*' and rest_char != '*': |
|
|
|
highlighted += f'<span class="restored-highlight">{rest_char}</span>' |
|
else: |
|
highlighted += rest_char |
|
|
|
|
|
if len(restored_seq) > len(original_seq): |
|
highlighted += restored_seq[len(original_seq):] |
|
|
|
return highlighted |
|
|
|
|
|
with gr.Blocks(title="AbLang2 Sequence Restorer", theme=gr.themes.Soft(), css=""" |
|
* { |
|
font-family: 'Courier New', monospace !important; |
|
} |
|
.sequence-input, .sequence-output { |
|
font-family: 'Courier New', monospace !important; |
|
font-size: 14px !important; |
|
letter-spacing: 0.5px !important; |
|
} |
|
.restored-highlight { |
|
background-color: #90EE90 !important; |
|
color: #000 !important; |
|
font-weight: bold !important; |
|
} |
|
.examples { |
|
font-family: 'Courier New', monospace !important; |
|
font-size: 14px !important; |
|
letter-spacing: 0.5px !important; |
|
} |
|
.restored-sequence-box { |
|
font-family: 'Courier New', monospace !important; |
|
font-size: 14px !important; |
|
letter-spacing: 0.5px !important; |
|
white-space: pre-wrap !important; |
|
word-wrap: break-word !important; |
|
overflow-wrap: break-word !important; |
|
} |
|
.restored-heading { |
|
color: #2E8B57 !important; |
|
font-weight: bold !important; |
|
font-size: 18px !important; |
|
} |
|
.example-text { |
|
font-family: 'Courier New', monospace !important; |
|
font-size: 12px !important; |
|
white-space: pre-wrap !important; |
|
word-wrap: break-word !important; |
|
} |
|
.examples-table { |
|
font-family: 'Courier New', monospace !important; |
|
font-size: 12px !important; |
|
white-space: pre-wrap !important; |
|
word-wrap: break-word !important; |
|
max-width: none !important; |
|
overflow: visible !important; |
|
} |
|
.examples-table td { |
|
font-family: 'Courier New', monospace !important; |
|
font-size: 12px !important; |
|
white-space: pre-wrap !important; |
|
word-wrap: break-word !important; |
|
max-width: none !important; |
|
overflow: visible !important; |
|
text-overflow: unset !important; |
|
} |
|
.sequence-output label { |
|
font-weight: bold !important; |
|
color: #495057 !important; |
|
font-size: 14px !important; |
|
margin-bottom: 5px !important; |
|
} |
|
/* Force full display of examples */ |
|
.examples-container { |
|
font-family: 'Courier New', monospace !important; |
|
font-size: 12px !important; |
|
} |
|
.examples-container table { |
|
width: 100% !important; |
|
table-layout: auto !important; |
|
} |
|
.examples-container td { |
|
white-space: pre-wrap !important; |
|
word-wrap: break-word !important; |
|
overflow-wrap: break-word !important; |
|
max-width: none !important; |
|
text-overflow: unset !important; |
|
padding: 8px !important; |
|
vertical-align: top !important; |
|
} |
|
.examples-container th { |
|
white-space: nowrap !important; |
|
padding: 8px !important; |
|
} |
|
/* Override any Gradio default truncation */ |
|
.examples table td { |
|
white-space: pre-wrap !important; |
|
word-wrap: break-word !important; |
|
overflow-wrap: break-word !important; |
|
max-width: none !important; |
|
text-overflow: unset !important; |
|
overflow: visible !important; |
|
font-family: 'Courier New', monospace !important; |
|
font-size: 12px !important; |
|
} |
|
.examples table { |
|
table-layout: auto !important; |
|
width: 100% !important; |
|
} |
|
/* Target the specific examples component */ |
|
div[data-testid="examples"] table td { |
|
white-space: pre-wrap !important; |
|
word-wrap: break-word !important; |
|
overflow-wrap: break-word !important; |
|
max-width: none !important; |
|
text-overflow: unset !important; |
|
overflow: visible !important; |
|
font-family: 'Courier New', monospace !important; |
|
font-size: 12px !important; |
|
} |
|
/* Force examples to show full content */ |
|
.examples table, .examples table td, .examples table th { |
|
white-space: pre-wrap !important; |
|
word-wrap: break-word !important; |
|
overflow-wrap: break-word !important; |
|
max-width: none !important; |
|
text-overflow: unset !important; |
|
overflow: visible !important; |
|
font-family: 'Courier New', monospace !important; |
|
font-size: 12px !important; |
|
table-layout: auto !important; |
|
width: auto !important; |
|
min-width: 100% !important; |
|
} |
|
/* Override any inline styles */ |
|
.examples * { |
|
white-space: pre-wrap !important; |
|
word-wrap: break-word !important; |
|
overflow-wrap: break-word !important; |
|
max-width: none !important; |
|
text-overflow: unset !important; |
|
overflow: visible !important; |
|
} |
|
/* Style output labels to match input labels exactly */ |
|
.output-label { |
|
font-weight: 600 !important; |
|
color: var(--label-text-color) !important; |
|
font-size: 14px !important; |
|
margin-bottom: 8px !important; |
|
margin-top: 16px !important; |
|
line-height: 1.4 !important; |
|
display: block !important; |
|
} |
|
""") as demo: |
|
gr.Markdown(""" |
|
# 🧬 AbLang2 Sequence Restorer |
|
|
|
This app uses the AbLang2 model to restore masked residues (*) in antibody sequences. |
|
You can provide either one or both heavy and light chain sequences. |
|
|
|
**Instructions:** |
|
- Use `*` to mask residues you want to restore |
|
- Provide heavy chain, light chain, or both |
|
- Enable "Use Alignment" for variable missing lengths |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
heavy_input = gr.Textbox( |
|
label="Heavy Chain Sequence", |
|
placeholder="Enter heavy chain sequence with masked residues (*)...", |
|
lines=3, |
|
max_lines=5, |
|
elem_classes=["sequence-input"] |
|
) |
|
|
|
light_input = gr.Textbox( |
|
label="Light Chain Sequence", |
|
placeholder="Enter light chain sequence with masked residues (*)...", |
|
lines=3, |
|
max_lines=5, |
|
elem_classes=["sequence-input"] |
|
) |
|
|
|
align_checkbox = gr.Checkbox( |
|
label="Use Alignment (for variable missing lengths) - Requires anarci package", |
|
value=False |
|
) |
|
|
|
restore_btn = gr.Button("🔄 Restore Sequences", variant="primary") |
|
|
|
with gr.Column(): |
|
gr.Markdown("### 🧬 Restored Sequences", elem_classes=["restored-heading"]) |
|
gr.Markdown("*Green highlighting shows restored residues*") |
|
|
|
gr.Markdown("**Heavy Chain Sequence**", elem_classes=["output-label"]) |
|
heavy_output = gr.HTML(label="") |
|
|
|
gr.Markdown("**Light Chain Sequence**", elem_classes=["output-label"]) |
|
light_output = gr.HTML(label="") |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
[ |
|
"EVQ***SGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCAR**PGHGAAFMDVWGTGTTVTVSS", |
|
"DIQLTQSPLSLPVTLGQPASISCRSS*SLEASDTNIYLSWFQQRPGQSPRRLIYKI*NRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK" |
|
], |
|
[ |
|
"EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMGWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDY**GMDVWGQGTTVTVSS", |
|
"" |
|
], |
|
[ |
|
"", |
|
"DIQLTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIY*ASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTP*TFGQGTKVEIK" |
|
] |
|
], |
|
inputs=[heavy_input, light_input], |
|
label="Example Sequences" |
|
) |
|
|
|
|
|
restore_btn.click( |
|
fn=restore_sequences, |
|
inputs=[heavy_input, light_input, align_checkbox], |
|
outputs=[heavy_output, light_output] |
|
) |
|
|
|
gr.Markdown(""" |
|
--- |
|
**Note:** This app uses the AbLang2 model from Hugging Face Hub. |
|
The restoration process may take a few seconds depending on sequence length and complexity. |
|
""") |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |