DiffAb / abnumber /alignment.py
luost26's picture
Update
6d34920
from typing import Union
from abnumber.common import is_similar_residue, is_integer
from abnumber.position import Position
class Alignment:
"""Antibody chain alignment of two or more chains
>>> from abnumber import Chain
>>>
>>> seq1 = 'QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSSAKTTAP'
>>> chain1 = Chain(seq1, scheme='imgt')
>>>
>>> seq2 = 'QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYDDYLDRWGQGTTLTVSSAKTTAP'
>>> chain2 = Chain(seq2, scheme='imgt')
>>> alignment = chain1.align(chain2)
Alignment can be sliced and iterated:
>>> for pos, (aa, bb) in alignment[:'5']:
>>> print(pos, aa, bb)
H1 Q Q
H2 V V
H3 Q Q
H4 L L
H5 Q V
...
"""
def __init__(self, positions, residues, scheme, chain_type):
assert isinstance(positions, list), 'Expected list of positions and residues. ' \
'Use chain.align(other) to create an alignment.'
assert len(positions) == len(residues)
unique_cdr_definitions = set(pos.cdr_definition for pos in positions)
assert len(unique_cdr_definitions) <= 1, f'Aligned chains should use the same CDR definitions, got: {unique_cdr_definitions}'
self.positions = positions
self.residues = residues
self.scheme = scheme
self.chain_type = chain_type
self._zipped = list(zip(self.positions, self.residues))
def __repr__(self):
return self.format()
def __iter__(self):
yield from self._zipped.__iter__()
def __len__(self):
return len(self.positions)
def __getitem__(self, item):
if isinstance(item, slice):
if item.step is not None and item.step != 1:
raise IndexError(f'Slicing with step != 1 is not implemented, got: {item}')
return self.slice(start=item.start, stop=item.stop)
pos = self._parse_position(item)
raw_pos = self.positions.index(pos)
return self.residues[raw_pos]
def slice(self, start: Union[str, int, 'Position'] = None, stop: Union[str, int, 'Position'] = None,
stop_inclusive: bool = True, allow_raw: bool = False):
"""Create a slice of this alignment
You can also slice directly using ``alignment['111':'112A']`` or ``alignment.raw[10:20]``.
:param start: Slice start position (inclusive), :class:`Position` or string (e.g. '111A')
:param stop: Slice stop position (inclusive), :class:`Position` or string (e.g. '112A')
:param stop_inclusive: Include stop position in slice
:param allow_raw: Allow unaligned numeric indexing from 0 to length of sequence - 1
:return: new sliced Alignment object
"""
start = self._parse_position(start, allow_raw=allow_raw) if start is not None else None
stop = self._parse_position(stop, allow_raw=allow_raw) if stop is not None else None
new_positions = []
new_residues = []
for pos, residues in zip(self.positions, self.residues):
if start is not None and pos < start:
continue
if stop is not None and (pos > stop or (not stop_inclusive and pos >= stop)):
break
new_positions.append(pos)
new_residues.append(residues)
return Alignment(positions=new_positions, residues=new_residues, scheme=self.scheme, chain_type=self.chain_type)
def _parse_position(self, position: Union[int, str, 'Position'], allow_raw=False):
"""Create :class:`Position` key object from string or int.
Note: The position should only be used for indexing, CDR definition is not preserved!
:param position: Numeric or string position representation
:param allow_raw: Also allow unaligned numeric (int) indexing from 0 to length of sequence - 1
:return: new Position object, should only be used for indexing, CDR definition is not preserved!
"""
if isinstance(position, str):
return Position.from_string(position, chain_type=self.chain_type, scheme=self.scheme)
if isinstance(position, Position):
return position
try:
position = int(position)
except TypeError:
raise IndexError(f'Invalid position key, expected Position, string or integer, got {type(position)}: "{position}"')
if not allow_raw:
raise IndexError("Use chain.raw[i] for raw numeric indexing or pass allow_raw=True. "
"For named position indexing, use string (e.g. chain['111A'] or chain['H111A'])")
if position >= len(self.positions):
return None
return self.positions[position]
def format(self, mark_identity=True, mark_cdrs=True):
"""Format alignment to string
:param mark_identity: Add BLAST style middle line showing identity (``|``), similar residue (``+``) or different residue (``.``)
:param mark_cdrs: Add line highlighting CDR regions using ``^``
:return: formatted string
"""
def _identity_symbol(a, b):
return '|' if a == b else ('+' if is_similar_residue(a, b) else '.')
lines = []
for i in range(len(self.residues[0])):
if mark_identity and i != 0:
lines.append(''.join(_identity_symbol(aas[i], aas[i-1]) for pos, aas in self))
lines.append(''.join(aas[i] for pos, aas in self))
if mark_cdrs:
if self.positions[0].cdr_definition == 'kabat':
lines.append(''.join('^' if pos.is_in_cdr() else ("°" if pos.is_in_vernier() else ' ') for pos in self.positions))
else:
lines.append(''.join('^' if pos.is_in_cdr() else ' ' for pos in self.positions))
return '\n'.join(lines)
def print(self, mark_identity=True, mark_cdrs=True):
"""Print string representation of alignment created using :meth:`Alignment.format`
>>> alignment.print()
QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPS-RGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSS
||||.||||||.||||+|||||||||||.||||||||||||||||+||||||||.|.||||||||||||||||||||||||||.+|||||||||||||||||....||.|||||||||||
QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYD--DYLDRWGQGTTLTVSS
^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^^^
>>> alignment.print(mark_identity=False, mark_cdrs=False)
QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPS-RGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSS
QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYD--DYLDRWGQGTTLTVSS
:param mark_identity: Add BLAST style middle line showing identity (``|``), similar residue (``+``) or different residue (``.``)
:param mark_cdrs: Add line highlighting CDR regions using ``^``
"""
print(self.format(mark_identity=mark_identity, mark_cdrs=mark_cdrs))
def has_mutation(self):
"""Check if there is a mutation in the alignment or not"""
return any(len(set(aas)) != 1 for aas in self.residues)
def num_mutations(self):
"""Get number of mutations (positions with more than one type of residue)"""
return sum(len(set(aas)) != 1 for aas in self.residues)
@property
def raw(self):
"""Access raw representation of this alignment to allow unaligned numeric indexing and slicing
>>> # Numbering of ``chain.raw`` starts at 0
>>> alignment.raw[0]
'H1'
>>> # Slicing with string is based on schema numbering, the end is inclusive
>>> chain['1':'10']
'QVQLQQSGAE'
>>> # Slicing with ``chain.raw`` starts at 0, the end is exclusive (Python style)
>>> chain.raw[0:10]
'QVQLQQSGAE'
:return: Raw alignment accessor that can be sliced or indexed to produce a new :class:`Alignment` object
"""
return RawAlignmentAccessor(self)
class RawAlignmentAccessor:
def __init__(self, alignment: Alignment):
self.alignment = alignment
def __getitem__(self, item):
if isinstance(item, slice):
if item.step is not None and item.step != 1:
raise IndexError(f'Slicing with step != 1 is not implemented, got: {item}')
if item.start is not None and not is_integer(item.start):
raise IndexError(f'Expected int start index for alignment.raw, got {type(item.start)}: {item.start}')
if item.stop is not None and not is_integer(item.stop):
raise IndexError(f'Expected int end index for alignment.raw, got {type(item.stop)}: {item.stop}')
return self.alignment.slice(start=item.start, stop=item.stop, stop_inclusive=False, allow_raw=True)
if not is_integer(item):
raise IndexError(f'Expected int indexing for alignment.raw, got {type(item)}: {item}')
pos = self.alignment.positions[item]
return self.alignment[pos]