File size: 9,310 Bytes
6d34920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
from typing import Union

from abnumber.common import is_similar_residue, is_integer
from abnumber.position import Position


class Alignment:
    """Antibody chain alignment of two or more chains

    >>> from abnumber import Chain
    >>>
    >>> seq1 = 'QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSSAKTTAP'
    >>> chain1 = Chain(seq1, scheme='imgt')
    >>>
    >>> seq2 = 'QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYDDYLDRWGQGTTLTVSSAKTTAP'
    >>> chain2 = Chain(seq2, scheme='imgt')
    >>> alignment = chain1.align(chain2)

    Alignment can be sliced and iterated:

    >>> for pos, (aa, bb) in alignment[:'5']:
    >>>     print(pos, aa, bb)
    H1  Q Q
    H2  V V
    H3  Q Q
    H4  L L
    H5  Q V
    ...

    """
    def __init__(self, positions, residues, scheme, chain_type):
        assert isinstance(positions, list), 'Expected list of positions and residues. ' \
                                            'Use chain.align(other) to create an alignment.'
        assert len(positions) == len(residues)
        unique_cdr_definitions = set(pos.cdr_definition for pos in positions)
        assert len(unique_cdr_definitions) <= 1, f'Aligned chains should use the same CDR definitions, got: {unique_cdr_definitions}'
        self.positions = positions
        self.residues = residues
        self.scheme = scheme
        self.chain_type = chain_type
        self._zipped = list(zip(self.positions, self.residues))

    def __repr__(self):
        return self.format()

    def __iter__(self):
        yield from self._zipped.__iter__()

    def __len__(self):
        return len(self.positions)

    def __getitem__(self, item):
        if isinstance(item, slice):
            if item.step is not None and item.step != 1:
                raise IndexError(f'Slicing with step != 1 is not implemented, got: {item}')
            return self.slice(start=item.start, stop=item.stop)
        pos = self._parse_position(item)
        raw_pos = self.positions.index(pos)
        return self.residues[raw_pos]

    def slice(self, start: Union[str, int, 'Position'] = None, stop: Union[str, int, 'Position'] = None,
              stop_inclusive: bool = True, allow_raw: bool = False):
        """Create a slice of this alignment

        You can also slice directly using ``alignment['111':'112A']`` or ``alignment.raw[10:20]``.

        :param start: Slice start position (inclusive), :class:`Position` or string (e.g. '111A')
        :param stop: Slice stop position (inclusive), :class:`Position` or string (e.g. '112A')
        :param stop_inclusive: Include stop position in slice
        :param allow_raw: Allow unaligned numeric indexing from 0 to length of sequence - 1
        :return: new sliced Alignment object
        """

        start = self._parse_position(start, allow_raw=allow_raw) if start is not None else None
        stop = self._parse_position(stop, allow_raw=allow_raw) if stop is not None else None

        new_positions = []
        new_residues = []
        for pos, residues in zip(self.positions, self.residues):
            if start is not None and pos < start:
                continue
            if stop is not None and (pos > stop or (not stop_inclusive and pos >= stop)):
                break
            new_positions.append(pos)
            new_residues.append(residues)

        return Alignment(positions=new_positions, residues=new_residues, scheme=self.scheme, chain_type=self.chain_type)

    def _parse_position(self, position: Union[int, str, 'Position'], allow_raw=False):
        """Create :class:`Position` key object from string or int.

        Note: The position should only be used for indexing, CDR definition is not preserved!

        :param position: Numeric or string position representation
        :param allow_raw: Also allow unaligned numeric (int) indexing from 0 to length of sequence - 1
        :return: new Position object, should only be used for indexing, CDR definition is not preserved!
        """
        if isinstance(position, str):
            return Position.from_string(position, chain_type=self.chain_type, scheme=self.scheme)
        if isinstance(position, Position):
            return position
        try:
            position = int(position)
        except TypeError:
            raise IndexError(f'Invalid position key, expected Position, string or integer, got {type(position)}: "{position}"')
        if not allow_raw:
            raise IndexError("Use chain.raw[i] for raw numeric indexing or pass allow_raw=True. "
                             "For named position indexing, use string (e.g. chain['111A'] or chain['H111A'])")
        if position >= len(self.positions):
            return None
        return self.positions[position]

    def format(self, mark_identity=True, mark_cdrs=True):
        """Format alignment to string

        :param mark_identity: Add BLAST style middle line showing identity (``|``), similar residue (``+``) or different residue (``.``)
        :param mark_cdrs: Add line highlighting CDR regions using ``^``
        :return: formatted string
        """

        def _identity_symbol(a, b):
            return '|' if a == b else ('+' if is_similar_residue(a, b) else '.')

        lines = []
        for i in range(len(self.residues[0])):
            if mark_identity and i != 0:
                lines.append(''.join(_identity_symbol(aas[i], aas[i-1]) for pos, aas in self))
            lines.append(''.join(aas[i] for pos, aas in self))
        if mark_cdrs:
            if self.positions[0].cdr_definition == 'kabat':
                lines.append(''.join('^' if pos.is_in_cdr() else ("°" if pos.is_in_vernier() else ' ') for pos in self.positions))
            else:
                lines.append(''.join('^' if pos.is_in_cdr() else ' ' for pos in self.positions))
        return '\n'.join(lines)

    def print(self, mark_identity=True, mark_cdrs=True):
        """Print string representation of alignment created using :meth:`Alignment.format`

        >>> alignment.print()
        QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPS-RGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSS
        ||||.||||||.||||+|||||||||||.||||||||||||||||+||||||||.|.||||||||||||||||||||||||||.+|||||||||||||||||....||.|||||||||||
        QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYD--DYLDRWGQGTTLTVSS
                                 ^^^^^^^^                 ^^^^^^^^^                                      ^^^^^^^^^^^^
        >>> alignment.print(mark_identity=False, mark_cdrs=False)
        QVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPS-RGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDHYCLDYWGQGTTLTVSS
        QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYD--DYLDRWGQGTTLTVSS

        :param mark_identity: Add BLAST style middle line showing identity (``|``), similar residue (``+``) or different residue (``.``)
        :param mark_cdrs: Add line highlighting CDR regions using ``^``
        """
        print(self.format(mark_identity=mark_identity, mark_cdrs=mark_cdrs))

    def has_mutation(self):
        """Check if there is a mutation in the alignment or not"""
        return any(len(set(aas)) != 1 for aas in self.residues)

    def num_mutations(self):
        """Get number of mutations (positions with more than one type of residue)"""
        return sum(len(set(aas)) != 1 for aas in self.residues)

    @property
    def raw(self):
        """Access raw representation of this alignment to allow unaligned numeric indexing and slicing

        >>> # Numbering of ``chain.raw`` starts at 0
        >>> alignment.raw[0]
        'H1'
        >>> # Slicing with string is based on schema numbering, the end is inclusive
        >>> chain['1':'10']
        'QVQLQQSGAE'
        >>> # Slicing with ``chain.raw`` starts at 0, the end is exclusive (Python style)
        >>> chain.raw[0:10]
        'QVQLQQSGAE'
        :return: Raw alignment accessor that can be sliced or indexed to produce a new :class:`Alignment` object
        """
        return RawAlignmentAccessor(self)


class RawAlignmentAccessor:
    def __init__(self, alignment: Alignment):
        self.alignment = alignment

    def __getitem__(self, item):
        if isinstance(item, slice):
            if item.step is not None and item.step != 1:
                raise IndexError(f'Slicing with step != 1 is not implemented, got: {item}')
            if item.start is not None and not is_integer(item.start):
                raise IndexError(f'Expected int start index for alignment.raw, got {type(item.start)}: {item.start}')
            if item.stop is not None and not is_integer(item.stop):
                raise IndexError(f'Expected int end index for alignment.raw, got {type(item.stop)}: {item.stop}')
            return self.alignment.slice(start=item.start, stop=item.stop, stop_inclusive=False, allow_raw=True)
        if not is_integer(item):
            raise IndexError(f'Expected int indexing for alignment.raw, got {type(item)}: {item}')
        pos = self.alignment.positions[item]
        return self.alignment[pos]