from typing import Union from abnumber.common import is_similar_residue, is_integer from abnumber.position import Position class Alignment: """Antibody chain alignment of two or more chains >>> from abnumber import Chain >>> >>> seq1 = 'QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSSAKTTAP' >>> chain1 = Chain(seq1, scheme='imgt') >>> >>> seq2 = 'QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYDDYLDRWGQGTTLTVSSAKTTAP' >>> chain2 = Chain(seq2, scheme='imgt') >>> alignment = chain1.align(chain2) Alignment can be sliced and iterated: >>> for pos, (aa, bb) in alignment[:'5']: >>> print(pos, aa, bb) H1 Q Q H2 V V H3 Q Q H4 L L H5 Q V ... """ def __init__(self, positions, residues, scheme, chain_type): assert isinstance(positions, list), 'Expected list of positions and residues. ' \ 'Use chain.align(other) to create an alignment.' assert len(positions) == len(residues) unique_cdr_definitions = set(pos.cdr_definition for pos in positions) assert len(unique_cdr_definitions) <= 1, f'Aligned chains should use the same CDR definitions, got: {unique_cdr_definitions}' self.positions = positions self.residues = residues self.scheme = scheme self.chain_type = chain_type self._zipped = list(zip(self.positions, self.residues)) def __repr__(self): return self.format() def __iter__(self): yield from self._zipped.__iter__() def __len__(self): return len(self.positions) def __getitem__(self, item): if isinstance(item, slice): if item.step is not None and item.step != 1: raise IndexError(f'Slicing with step != 1 is not implemented, got: {item}') return self.slice(start=item.start, stop=item.stop) pos = self._parse_position(item) raw_pos = self.positions.index(pos) return self.residues[raw_pos] def slice(self, start: Union[str, int, 'Position'] = None, stop: Union[str, int, 'Position'] = None, stop_inclusive: bool = True, allow_raw: bool = False): """Create a slice of this alignment You can also slice directly using ``alignment['111':'112A']`` or ``alignment.raw[10:20]``. :param start: Slice start position (inclusive), :class:`Position` or string (e.g. '111A') :param stop: Slice stop position (inclusive), :class:`Position` or string (e.g. '112A') :param stop_inclusive: Include stop position in slice :param allow_raw: Allow unaligned numeric indexing from 0 to length of sequence - 1 :return: new sliced Alignment object """ start = self._parse_position(start, allow_raw=allow_raw) if start is not None else None stop = self._parse_position(stop, allow_raw=allow_raw) if stop is not None else None new_positions = [] new_residues = [] for pos, residues in zip(self.positions, self.residues): if start is not None and pos < start: continue if stop is not None and (pos > stop or (not stop_inclusive and pos >= stop)): break new_positions.append(pos) new_residues.append(residues) return Alignment(positions=new_positions, residues=new_residues, scheme=self.scheme, chain_type=self.chain_type) def _parse_position(self, position: Union[int, str, 'Position'], allow_raw=False): """Create :class:`Position` key object from string or int. Note: The position should only be used for indexing, CDR definition is not preserved! :param position: Numeric or string position representation :param allow_raw: Also allow unaligned numeric (int) indexing from 0 to length of sequence - 1 :return: new Position object, should only be used for indexing, CDR definition is not preserved! """ if isinstance(position, str): return Position.from_string(position, chain_type=self.chain_type, scheme=self.scheme) if isinstance(position, Position): return position try: position = int(position) except TypeError: raise IndexError(f'Invalid position key, expected Position, string or integer, got {type(position)}: "{position}"') if not allow_raw: raise IndexError("Use chain.raw[i] for raw numeric indexing or pass allow_raw=True. " "For named position indexing, use string (e.g. chain['111A'] or chain['H111A'])") if position >= len(self.positions): return None return self.positions[position] def format(self, mark_identity=True, mark_cdrs=True): """Format alignment to string :param mark_identity: Add BLAST style middle line showing identity (``|``), similar residue (``+``) or different residue (``.``) :param mark_cdrs: Add line highlighting CDR regions using ``^`` :return: formatted string """ def _identity_symbol(a, b): return '|' if a == b else ('+' if is_similar_residue(a, b) else '.') lines = [] for i in range(len(self.residues[0])): if mark_identity and i != 0: lines.append(''.join(_identity_symbol(aas[i], aas[i-1]) for pos, aas in self)) lines.append(''.join(aas[i] for pos, aas in self)) if mark_cdrs: if self.positions[0].cdr_definition == 'kabat': lines.append(''.join('^' if pos.is_in_cdr() else ("°" if pos.is_in_vernier() else ' ') for pos in self.positions)) else: lines.append(''.join('^' if pos.is_in_cdr() else ' ' for pos in self.positions)) return '\n'.join(lines) def print(self, mark_identity=True, mark_cdrs=True): """Print string representation of alignment created using :meth:`Alignment.format` >>> alignment.print() QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPS-RGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSS ||||.||||||.||||+|||||||||||.||||||||||||||||+||||||||.|.||||||||||||||||||||||||||.+|||||||||||||||||....||.||||||||||| QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYD--DYLDRWGQGTTLTVSS ^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^^^ >>> alignment.print(mark_identity=False, mark_cdrs=False) QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPS-RGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSS QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYD--DYLDRWGQGTTLTVSS :param mark_identity: Add BLAST style middle line showing identity (``|``), similar residue (``+``) or different residue (``.``) :param mark_cdrs: Add line highlighting CDR regions using ``^`` """ print(self.format(mark_identity=mark_identity, mark_cdrs=mark_cdrs)) def has_mutation(self): """Check if there is a mutation in the alignment or not""" return any(len(set(aas)) != 1 for aas in self.residues) def num_mutations(self): """Get number of mutations (positions with more than one type of residue)""" return sum(len(set(aas)) != 1 for aas in self.residues) @property def raw(self): """Access raw representation of this alignment to allow unaligned numeric indexing and slicing >>> # Numbering of ``chain.raw`` starts at 0 >>> alignment.raw[0] 'H1' >>> # Slicing with string is based on schema numbering, the end is inclusive >>> chain['1':'10'] 'QVQLQQSGAE' >>> # Slicing with ``chain.raw`` starts at 0, the end is exclusive (Python style) >>> chain.raw[0:10] 'QVQLQQSGAE' :return: Raw alignment accessor that can be sliced or indexed to produce a new :class:`Alignment` object """ return RawAlignmentAccessor(self) class RawAlignmentAccessor: def __init__(self, alignment: Alignment): self.alignment = alignment def __getitem__(self, item): if isinstance(item, slice): if item.step is not None and item.step != 1: raise IndexError(f'Slicing with step != 1 is not implemented, got: {item}') if item.start is not None and not is_integer(item.start): raise IndexError(f'Expected int start index for alignment.raw, got {type(item.start)}: {item.start}') if item.stop is not None and not is_integer(item.stop): raise IndexError(f'Expected int end index for alignment.raw, got {type(item.stop)}: {item.stop}') return self.alignment.slice(start=item.start, stop=item.stop, stop_inclusive=False, allow_raw=True) if not is_integer(item): raise IndexError(f'Expected int indexing for alignment.raw, got {type(item)}: {item}') pos = self.alignment.positions[item] return self.alignment[pos]