File size: 12,857 Bytes
712d350 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 |
import gradio as gr
import sys
import os
from transformers import AutoModel, AutoTokenizer
from transformers.utils import cached_file
# Load model and tokenizer from Hugging Face Hub
model = AutoModel.from_pretrained("hemantn/ablang2", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("hemantn/ablang2", trust_remote_code=True)
# Find the cached model directory and import adapter
adapter_path = cached_file("hemantn/ablang2", "adapter.py")
cached_model_dir = os.path.dirname(adapter_path)
sys.path.insert(0, cached_model_dir)
# Import and create the adapter
from adapter import AbLang2PairedHuggingFaceAdapter
ablang = AbLang2PairedHuggingFaceAdapter(model=model, tokenizer=tokenizer)
def restore_sequences(heavy_chain, light_chain, use_align=False):
"""
Restore masked residues in antibody sequences.
Args:
heavy_chain (str): Heavy chain sequence with masked residues (*)
light_chain (str): Light chain sequence with masked residues (*)
use_align (bool): Whether to use alignment for variable missing lengths
Returns:
tuple: (restored_heavy, restored_light, highlighted_heavy, highlighted_light)
"""
try:
# Check if alignment is requested but not available
if use_align:
try:
import anarci
except ImportError:
return "Alignment feature requires 'anarci' package which is not available. Please disable alignment option.", "", ""
# Prepare input sequences
if heavy_chain.strip() and light_chain.strip():
# Both chains provided
sequences = [[heavy_chain.strip(), light_chain.strip()]]
elif heavy_chain.strip():
# Only heavy chain provided
sequences = [[heavy_chain.strip(), ""]]
elif light_chain.strip():
# Only light chain provided
sequences = [["", light_chain.strip()]]
else:
return "Please provide at least one antibody chain sequence.", "", "", ""
# Perform restoration
restored = ablang(sequences, mode='restore', align=use_align)
# Format output
if hasattr(restored, '__len__') and len(restored) > 0:
result = restored[0] # Get the first (and only) result
# Parse the result to separate heavy and light chains
if '>|<' in result:
# Both chains present
heavy_part = result.split('>|<')[0].replace('<', '').replace('>', '')
light_part = result.split('>|<')[1].replace('<', '').replace('>', '')
elif result.startswith('<') and result.endswith('>'):
# Only one chain present
if heavy_chain.strip():
heavy_part = result.replace('<', '').replace('>', '')
light_part = ""
else:
heavy_part = ""
light_part = result.replace('<', '').replace('>', '')
else:
return "Error: Unexpected result format.", "", "", ""
# Create highlighted versions
highlighted_heavy = highlight_restored_residues(heavy_chain.strip(), heavy_part)
highlighted_light = highlight_restored_residues(light_chain.strip(), light_part)
# Create HTML outputs with proper styling - no scroll, wrap text
heavy_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_heavy}</div>'
light_html = f'<div class="restored-sequence-box" style="padding: 10px; background-color: #f8f9fa; border: 1px solid #dee2e6; border-radius: 4px;">{highlighted_light}</div>'
return heavy_html, light_html
else:
return "Error: No restoration result obtained.", "", ""
except Exception as e:
return f"Error during restoration: {str(e)}", "", ""
def highlight_restored_residues(original_seq, restored_seq):
"""
Highlight restored residues in green.
"""
if not original_seq or not restored_seq:
return restored_seq
highlighted = ""
for i, (orig_char, rest_char) in enumerate(zip(original_seq, restored_seq)):
if orig_char == '*' and rest_char != '*':
# This residue was restored
highlighted += f'<span class="restored-highlight">{rest_char}</span>'
else:
highlighted += rest_char
# Add any remaining characters from restored sequence
if len(restored_seq) > len(original_seq):
highlighted += restored_seq[len(original_seq):]
return highlighted
# Create Gradio interface
with gr.Blocks(title="AbLang2 Sequence Restorer", theme=gr.themes.Soft(), css="""
* {
font-family: 'Courier New', monospace !important;
}
.sequence-input, .sequence-output {
font-family: 'Courier New', monospace !important;
font-size: 14px !important;
letter-spacing: 0.5px !important;
}
.restored-highlight {
background-color: #90EE90 !important;
color: #000 !important;
font-weight: bold !important;
}
.examples {
font-family: 'Courier New', monospace !important;
font-size: 14px !important;
letter-spacing: 0.5px !important;
}
.restored-sequence-box {
font-family: 'Courier New', monospace !important;
font-size: 14px !important;
letter-spacing: 0.5px !important;
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
}
.restored-heading {
color: #2E8B57 !important;
font-weight: bold !important;
font-size: 18px !important;
}
.example-text {
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
white-space: pre-wrap !important;
word-wrap: break-word !important;
}
.examples-table {
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
white-space: pre-wrap !important;
word-wrap: break-word !important;
max-width: none !important;
overflow: visible !important;
}
.examples-table td {
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
white-space: pre-wrap !important;
word-wrap: break-word !important;
max-width: none !important;
overflow: visible !important;
text-overflow: unset !important;
}
.sequence-output label {
font-weight: bold !important;
color: #495057 !important;
font-size: 14px !important;
margin-bottom: 5px !important;
}
/* Force full display of examples */
.examples-container {
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
}
.examples-container table {
width: 100% !important;
table-layout: auto !important;
}
.examples-container td {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: none !important;
text-overflow: unset !important;
padding: 8px !important;
vertical-align: top !important;
}
.examples-container th {
white-space: nowrap !important;
padding: 8px !important;
}
/* Override any Gradio default truncation */
.examples table td {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: none !important;
text-overflow: unset !important;
overflow: visible !important;
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
}
.examples table {
table-layout: auto !important;
width: 100% !important;
}
/* Target the specific examples component */
div[data-testid="examples"] table td {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: none !important;
text-overflow: unset !important;
overflow: visible !important;
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
}
/* Force examples to show full content */
.examples table, .examples table td, .examples table th {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: none !important;
text-overflow: unset !important;
overflow: visible !important;
font-family: 'Courier New', monospace !important;
font-size: 12px !important;
table-layout: auto !important;
width: auto !important;
min-width: 100% !important;
}
/* Override any inline styles */
.examples * {
white-space: pre-wrap !important;
word-wrap: break-word !important;
overflow-wrap: break-word !important;
max-width: none !important;
text-overflow: unset !important;
overflow: visible !important;
}
/* Style output labels to match input labels exactly */
.output-label {
font-weight: 600 !important;
color: var(--label-text-color) !important;
font-size: 14px !important;
margin-bottom: 8px !important;
margin-top: 16px !important;
line-height: 1.4 !important;
display: block !important;
}
""") as demo:
gr.Markdown("""
# 🧬 AbLang2 Sequence Restorer
This app uses the AbLang2 model to restore masked residues (*) in antibody sequences.
You can provide either one or both heavy and light chain sequences.
**Instructions:**
- Use `*` to mask residues you want to restore
- Provide heavy chain, light chain, or both
- Enable "Use Alignment" for variable missing lengths
""")
with gr.Row():
with gr.Column():
heavy_input = gr.Textbox(
label="Heavy Chain Sequence",
placeholder="Enter heavy chain sequence with masked residues (*)...",
lines=3,
max_lines=5,
elem_classes=["sequence-input"]
)
light_input = gr.Textbox(
label="Light Chain Sequence",
placeholder="Enter light chain sequence with masked residues (*)...",
lines=3,
max_lines=5,
elem_classes=["sequence-input"]
)
align_checkbox = gr.Checkbox(
label="Use Alignment (for variable missing lengths) - Requires anarci package",
value=False
)
restore_btn = gr.Button("🔄 Restore Sequences", variant="primary")
with gr.Column():
gr.Markdown("### 🧬 Restored Sequences", elem_classes=["restored-heading"])
gr.Markdown("*Green highlighting shows restored residues*")
gr.Markdown("**Heavy Chain Sequence**", elem_classes=["output-label"])
heavy_output = gr.HTML(label="")
gr.Markdown("**Light Chain Sequence**", elem_classes=["output-label"])
light_output = gr.HTML(label="")
# Example sequences
gr.Examples(
examples=[
[
"EVQ***SGGEVKKPGASVKVSCRASGYTFRNYGLTWVRQAPGQGLEWMGWISAYNGNTNYAQKFQGRVTLTTDTSTSTAYMELRSLRSDDTAVYFCAR**PGHGAAFMDVWGTGTTVTVSS",
"DIQLTQSPLSLPVTLGQPASISCRSS*SLEASDTNIYLSWFQQRPGQSPRRLIYKI*NRDSGVPDRFSGSGSGTHFTLRISRVEADDVAVYYCMQGTHWPPAFGQGTKVDIK"
],
[
"EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYAMGWVRQAPGKGLEWVSAISGSGGSTYYADSVKGRFTISRDNSKNTLYLQMNSLRAEDTAVYYCARDY**GMDVWGQGTTVTVSS",
""
],
[
"",
"DIQLTQSPSSLSASVGDRVTITCRASQSISSYLNWYQQKPGKAPKLLIY*ASSLQSGVPSRFSGSGSGTDFTLTISSLQPEDFATYYCQQSYSTP*TFGQGTKVEIK"
]
],
inputs=[heavy_input, light_input],
label="Example Sequences"
)
# Connect the button to the function
restore_btn.click(
fn=restore_sequences,
inputs=[heavy_input, light_input, align_checkbox],
outputs=[heavy_output, light_output]
)
gr.Markdown("""
---
**Note:** This app uses the AbLang2 model from Hugging Face Hub.
The restoration process may take a few seconds depending on sequence length and complexity.
""")
if __name__ == "__main__":
demo.launch() |