Spaces:
Runtime error
Runtime error
File size: 6,692 Bytes
6d34920 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import copy
from typing import List, Union
from abnumber.common import _validate_chain_type, SCHEME_POSITION_TO_REGION, SCHEME_VERNIER, POS_REGEX
class Position:
"""Numbered position using a given numbering scheme
Used as a key to store Position -> Amino acid information.
Position objects are sortable according to the schema simply using ``sorted()``.
"""
def __init__(self, chain_type: str, number: int, letter: str, scheme: str):
_validate_chain_type(chain_type)
self.chain_type: str = chain_type
self.number: int = int(number)
self.letter: str = letter.strip()
self.scheme: str = scheme
self.cdr_definition: str = self.scheme
self.cdr_definition_position: int = self.number
def copy(self):
return copy.copy(self)
def _key(self):
# Note: We are not including chain_type, but just Heavy/Light flag, to keep Kappa and Lambda chain positions equal
return self.chain_type_prefix(), self.number, self.letter, self.scheme
def __repr__(self):
return f'{self.chain_type_prefix()}{self.number}{self.letter} ({self.scheme})'
def __str__(self):
return self.format()
def set_cdr_definition(self, cdr_definition: str, cdr_definition_position: int):
assert cdr_definition is not None, 'cdr_definition is required'
assert cdr_definition_position is not None, 'cdr_definition_position is required'
self.cdr_definition = cdr_definition
self.cdr_definition_position = cdr_definition_position
def format(self, chain_type=True, region=False, rjust=False, ljust=False, fillchar=' '):
"""Format Position to string
:param chain_type: Add chain type prefix (H/L)
:param region: Add region prefix (FR1, CDR1, ...)
:param rjust: Align text to the right
:param ljust: Align text to the left
:param fillchar: Characer to use for alignment padding
:return: formatted string
"""
formatted = f'{self.number}{self.letter}'
if chain_type:
formatted = f'{self.chain_type_prefix()}{formatted}'
if region:
formatted = f'{self.get_region()} {formatted}'
just = 4 + 1* int(chain_type) + 5 * int(region)
if rjust:
formatted = formatted.rjust(just, fillchar)
if ljust:
formatted = formatted.ljust(just, fillchar)
return formatted
def __hash__(self):
return self._key().__hash__()
def __eq__(self, other):
return isinstance(other, Position) and self._key() == other._key()
def __ge__(self, other):
return self == other or self > other
def __le__(self, other):
return self == other or self < other
def __lt__(self, other):
if not isinstance(other, Position):
raise TypeError(f'Cannot compare Position object with {type(other)}: {other}')
assert self.is_heavy_chain() == other.is_heavy_chain(), f'Positions do not come from the same chain: {self}, {other}'
assert self.scheme == other.scheme, 'Comparing positions in different schemes is not implemented'
return self._sort_key() < other._sort_key()
def chain_type_prefix(self):
if self.chain_type == 'H':
return 'H'
if self.chain_type in ['K', 'L']:
return 'L'
raise NotImplementedError(f'Unknown chain type "{self.chain_type}"')
def _sort_key(self):
letter_ord = ord(self.letter) if self.letter else 0
if self.scheme == 'imgt':
if self.number in [33, 61, 112]:
# position 112 is sorted in reverse
letter_ord = -letter_ord
elif self.scheme in ['chothia', 'kabat', 'aho']:
# all letters are sorted alphabetically for these schemes
pass
else:
raise NotImplementedError(f'Cannot compare positions of scheme: {self.scheme}')
return self.is_heavy_chain(), self.number, letter_ord
def get_region(self):
"""Get string name of this position's region
:return: uppercase string, one of: ``"FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"``
"""
if self.cdr_definition in SCHEME_POSITION_TO_REGION:
regions = SCHEME_POSITION_TO_REGION[self.cdr_definition]
else:
regions = SCHEME_POSITION_TO_REGION[f'{self.cdr_definition}_{self.chain_type}']
return regions[self.cdr_definition_position]
def is_in_cdr(self):
"""Check if given position is found in the CDR regions"""
return self.get_region().lower().startswith('cdr')
def is_in_vernier(self):
if self.cdr_definition != 'kabat':
raise NotImplementedError('Vernier zone identification is currently supported '
f'only with Kabat CDR definitions, got: {self.cdr_definition}')
return self.cdr_definition_position in SCHEME_VERNIER.get(f'{self.cdr_definition}_{self.chain_type}', [])
@classmethod
def from_string(cls, position, chain_type, scheme):
"""Create Position object from string, e.g. "H5"
Note that Positions parsed from string do not support separate CDR definitions.
"""
match = POS_REGEX.match(position.upper())
_validate_chain_type(chain_type)
expected_chain_prefix = 'H' if chain_type == 'H' else 'L'
if match is None:
raise IndexError(f'Expected position format chainNumberLetter '
f'(e.g. "{expected_chain_prefix}112A" or "112A"), got: "{position}"')
chain_prefix, number, letter = match.groups()
number = int(number)
if chain_prefix and expected_chain_prefix != chain_prefix:
raise IndexError(f'Use no prefix or "{expected_chain_prefix}" prefix for "{chain_type}" chain. '
f'Got: "{chain_prefix}".')
return cls(chain_type=chain_type, number=number, letter=letter, scheme=scheme)
def is_heavy_chain(self):
return self.chain_type == 'H'
def is_light_chain(self):
return self.chain_type in 'KL'
def sort_positions(positions: List[str], chain_type: str, scheme: str) -> List:
"""Sort position strings to correct order based on given scheme"""
has_prefix = [p.startswith('H') or p.startswith('L') for p in positions]
assert all(has_prefix) or not any(has_prefix), 'Inconsistent position prefix'
has_prefix = all(has_prefix)
position_objects = [Position.from_string(p, chain_type=chain_type, scheme=scheme) for p in positions]
return [p.format(chain_type=has_prefix) for p in sorted(position_objects)]
|