Spaces:
Runtime error
Runtime error
File size: 9,310 Bytes
6d34920 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
from typing import Union
from abnumber.common import is_similar_residue, is_integer
from abnumber.position import Position
class Alignment:
"""Antibody chain alignment of two or more chains
>>> from abnumber import Chain
>>>
>>> seq1 = 'QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSSAKTTAP'
>>> chain1 = Chain(seq1, scheme='imgt')
>>>
>>> seq2 = 'QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYDDYLDRWGQGTTLTVSSAKTTAP'
>>> chain2 = Chain(seq2, scheme='imgt')
>>> alignment = chain1.align(chain2)
Alignment can be sliced and iterated:
>>> for pos, (aa, bb) in alignment[:'5']:
>>> print(pos, aa, bb)
H1 Q Q
H2 V V
H3 Q Q
H4 L L
H5 Q V
...
"""
def __init__(self, positions, residues, scheme, chain_type):
assert isinstance(positions, list), 'Expected list of positions and residues. ' \
'Use chain.align(other) to create an alignment.'
assert len(positions) == len(residues)
unique_cdr_definitions = set(pos.cdr_definition for pos in positions)
assert len(unique_cdr_definitions) <= 1, f'Aligned chains should use the same CDR definitions, got: {unique_cdr_definitions}'
self.positions = positions
self.residues = residues
self.scheme = scheme
self.chain_type = chain_type
self._zipped = list(zip(self.positions, self.residues))
def __repr__(self):
return self.format()
def __iter__(self):
yield from self._zipped.__iter__()
def __len__(self):
return len(self.positions)
def __getitem__(self, item):
if isinstance(item, slice):
if item.step is not None and item.step != 1:
raise IndexError(f'Slicing with step != 1 is not implemented, got: {item}')
return self.slice(start=item.start, stop=item.stop)
pos = self._parse_position(item)
raw_pos = self.positions.index(pos)
return self.residues[raw_pos]
def slice(self, start: Union[str, int, 'Position'] = None, stop: Union[str, int, 'Position'] = None,
stop_inclusive: bool = True, allow_raw: bool = False):
"""Create a slice of this alignment
You can also slice directly using ``alignment['111':'112A']`` or ``alignment.raw[10:20]``.
:param start: Slice start position (inclusive), :class:`Position` or string (e.g. '111A')
:param stop: Slice stop position (inclusive), :class:`Position` or string (e.g. '112A')
:param stop_inclusive: Include stop position in slice
:param allow_raw: Allow unaligned numeric indexing from 0 to length of sequence - 1
:return: new sliced Alignment object
"""
start = self._parse_position(start, allow_raw=allow_raw) if start is not None else None
stop = self._parse_position(stop, allow_raw=allow_raw) if stop is not None else None
new_positions = []
new_residues = []
for pos, residues in zip(self.positions, self.residues):
if start is not None and pos < start:
continue
if stop is not None and (pos > stop or (not stop_inclusive and pos >= stop)):
break
new_positions.append(pos)
new_residues.append(residues)
return Alignment(positions=new_positions, residues=new_residues, scheme=self.scheme, chain_type=self.chain_type)
def _parse_position(self, position: Union[int, str, 'Position'], allow_raw=False):
"""Create :class:`Position` key object from string or int.
Note: The position should only be used for indexing, CDR definition is not preserved!
:param position: Numeric or string position representation
:param allow_raw: Also allow unaligned numeric (int) indexing from 0 to length of sequence - 1
:return: new Position object, should only be used for indexing, CDR definition is not preserved!
"""
if isinstance(position, str):
return Position.from_string(position, chain_type=self.chain_type, scheme=self.scheme)
if isinstance(position, Position):
return position
try:
position = int(position)
except TypeError:
raise IndexError(f'Invalid position key, expected Position, string or integer, got {type(position)}: "{position}"')
if not allow_raw:
raise IndexError("Use chain.raw[i] for raw numeric indexing or pass allow_raw=True. "
"For named position indexing, use string (e.g. chain['111A'] or chain['H111A'])")
if position >= len(self.positions):
return None
return self.positions[position]
def format(self, mark_identity=True, mark_cdrs=True):
"""Format alignment to string
:param mark_identity: Add BLAST style middle line showing identity (``|``), similar residue (``+``) or different residue (``.``)
:param mark_cdrs: Add line highlighting CDR regions using ``^``
:return: formatted string
"""
def _identity_symbol(a, b):
return '|' if a == b else ('+' if is_similar_residue(a, b) else '.')
lines = []
for i in range(len(self.residues[0])):
if mark_identity and i != 0:
lines.append(''.join(_identity_symbol(aas[i], aas[i-1]) for pos, aas in self))
lines.append(''.join(aas[i] for pos, aas in self))
if mark_cdrs:
if self.positions[0].cdr_definition == 'kabat':
lines.append(''.join('^' if pos.is_in_cdr() else ("°" if pos.is_in_vernier() else ' ') for pos in self.positions))
else:
lines.append(''.join('^' if pos.is_in_cdr() else ' ' for pos in self.positions))
return '\n'.join(lines)
def print(self, mark_identity=True, mark_cdrs=True):
"""Print string representation of alignment created using :meth:`Alignment.format`
>>> alignment.print()
QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPS-RGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSS
||||.||||||.||||+|||||||||||.||||||||||||||||+||||||||.|.||||||||||||||||||||||||||.+|||||||||||||||||....||.|||||||||||
QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYD--DYLDRWGQGTTLTVSS
^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^^^
>>> alignment.print(mark_identity=False, mark_cdrs=False)
QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPS-RGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSS
QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYD--DYLDRWGQGTTLTVSS
:param mark_identity: Add BLAST style middle line showing identity (``|``), similar residue (``+``) or different residue (``.``)
:param mark_cdrs: Add line highlighting CDR regions using ``^``
"""
print(self.format(mark_identity=mark_identity, mark_cdrs=mark_cdrs))
def has_mutation(self):
"""Check if there is a mutation in the alignment or not"""
return any(len(set(aas)) != 1 for aas in self.residues)
def num_mutations(self):
"""Get number of mutations (positions with more than one type of residue)"""
return sum(len(set(aas)) != 1 for aas in self.residues)
@property
def raw(self):
"""Access raw representation of this alignment to allow unaligned numeric indexing and slicing
>>> # Numbering of ``chain.raw`` starts at 0
>>> alignment.raw[0]
'H1'
>>> # Slicing with string is based on schema numbering, the end is inclusive
>>> chain['1':'10']
'QVQLQQSGAE'
>>> # Slicing with ``chain.raw`` starts at 0, the end is exclusive (Python style)
>>> chain.raw[0:10]
'QVQLQQSGAE'
:return: Raw alignment accessor that can be sliced or indexed to produce a new :class:`Alignment` object
"""
return RawAlignmentAccessor(self)
class RawAlignmentAccessor:
def __init__(self, alignment: Alignment):
self.alignment = alignment
def __getitem__(self, item):
if isinstance(item, slice):
if item.step is not None and item.step != 1:
raise IndexError(f'Slicing with step != 1 is not implemented, got: {item}')
if item.start is not None and not is_integer(item.start):
raise IndexError(f'Expected int start index for alignment.raw, got {type(item.start)}: {item.start}')
if item.stop is not None and not is_integer(item.stop):
raise IndexError(f'Expected int end index for alignment.raw, got {type(item.stop)}: {item.stop}')
return self.alignment.slice(start=item.start, stop=item.stop, stop_inclusive=False, allow_raw=True)
if not is_integer(item):
raise IndexError(f'Expected int indexing for alignment.raw, got {type(item)}: {item}')
pos = self.alignment.positions[item]
return self.alignment[pos]
|