Spaces:
Runtime error
Runtime error
import copy | |
from typing import List, Union | |
from abnumber.common import _validate_chain_type, SCHEME_POSITION_TO_REGION, SCHEME_VERNIER, POS_REGEX | |
class Position: | |
"""Numbered position using a given numbering scheme | |
Used as a key to store Position -> Amino acid information. | |
Position objects are sortable according to the schema simply using ``sorted()``. | |
""" | |
def __init__(self, chain_type: str, number: int, letter: str, scheme: str): | |
_validate_chain_type(chain_type) | |
self.chain_type: str = chain_type | |
self.number: int = int(number) | |
self.letter: str = letter.strip() | |
self.scheme: str = scheme | |
self.cdr_definition: str = self.scheme | |
self.cdr_definition_position: int = self.number | |
def copy(self): | |
return copy.copy(self) | |
def _key(self): | |
# Note: We are not including chain_type, but just Heavy/Light flag, to keep Kappa and Lambda chain positions equal | |
return self.chain_type_prefix(), self.number, self.letter, self.scheme | |
def __repr__(self): | |
return f'{self.chain_type_prefix()}{self.number}{self.letter} ({self.scheme})' | |
def __str__(self): | |
return self.format() | |
def set_cdr_definition(self, cdr_definition: str, cdr_definition_position: int): | |
assert cdr_definition is not None, 'cdr_definition is required' | |
assert cdr_definition_position is not None, 'cdr_definition_position is required' | |
self.cdr_definition = cdr_definition | |
self.cdr_definition_position = cdr_definition_position | |
def format(self, chain_type=True, region=False, rjust=False, ljust=False, fillchar=' '): | |
"""Format Position to string | |
:param chain_type: Add chain type prefix (H/L) | |
:param region: Add region prefix (FR1, CDR1, ...) | |
:param rjust: Align text to the right | |
:param ljust: Align text to the left | |
:param fillchar: Characer to use for alignment padding | |
:return: formatted string | |
""" | |
formatted = f'{self.number}{self.letter}' | |
if chain_type: | |
formatted = f'{self.chain_type_prefix()}{formatted}' | |
if region: | |
formatted = f'{self.get_region()} {formatted}' | |
just = 4 + 1* int(chain_type) + 5 * int(region) | |
if rjust: | |
formatted = formatted.rjust(just, fillchar) | |
if ljust: | |
formatted = formatted.ljust(just, fillchar) | |
return formatted | |
def __hash__(self): | |
return self._key().__hash__() | |
def __eq__(self, other): | |
return isinstance(other, Position) and self._key() == other._key() | |
def __ge__(self, other): | |
return self == other or self > other | |
def __le__(self, other): | |
return self == other or self < other | |
def __lt__(self, other): | |
if not isinstance(other, Position): | |
raise TypeError(f'Cannot compare Position object with {type(other)}: {other}') | |
assert self.is_heavy_chain() == other.is_heavy_chain(), f'Positions do not come from the same chain: {self}, {other}' | |
assert self.scheme == other.scheme, 'Comparing positions in different schemes is not implemented' | |
return self._sort_key() < other._sort_key() | |
def chain_type_prefix(self): | |
if self.chain_type == 'H': | |
return 'H' | |
if self.chain_type in ['K', 'L']: | |
return 'L' | |
raise NotImplementedError(f'Unknown chain type "{self.chain_type}"') | |
def _sort_key(self): | |
letter_ord = ord(self.letter) if self.letter else 0 | |
if self.scheme == 'imgt': | |
if self.number in [33, 61, 112]: | |
# position 112 is sorted in reverse | |
letter_ord = -letter_ord | |
elif self.scheme in ['chothia', 'kabat', 'aho']: | |
# all letters are sorted alphabetically for these schemes | |
pass | |
else: | |
raise NotImplementedError(f'Cannot compare positions of scheme: {self.scheme}') | |
return self.is_heavy_chain(), self.number, letter_ord | |
def get_region(self): | |
"""Get string name of this position's region | |
:return: uppercase string, one of: ``"FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"`` | |
""" | |
if self.cdr_definition in SCHEME_POSITION_TO_REGION: | |
regions = SCHEME_POSITION_TO_REGION[self.cdr_definition] | |
else: | |
regions = SCHEME_POSITION_TO_REGION[f'{self.cdr_definition}_{self.chain_type}'] | |
return regions[self.cdr_definition_position] | |
def is_in_cdr(self): | |
"""Check if given position is found in the CDR regions""" | |
return self.get_region().lower().startswith('cdr') | |
def is_in_vernier(self): | |
if self.cdr_definition != 'kabat': | |
raise NotImplementedError('Vernier zone identification is currently supported ' | |
f'only with Kabat CDR definitions, got: {self.cdr_definition}') | |
return self.cdr_definition_position in SCHEME_VERNIER.get(f'{self.cdr_definition}_{self.chain_type}', []) | |
def from_string(cls, position, chain_type, scheme): | |
"""Create Position object from string, e.g. "H5" | |
Note that Positions parsed from string do not support separate CDR definitions. | |
""" | |
match = POS_REGEX.match(position.upper()) | |
_validate_chain_type(chain_type) | |
expected_chain_prefix = 'H' if chain_type == 'H' else 'L' | |
if match is None: | |
raise IndexError(f'Expected position format chainNumberLetter ' | |
f'(e.g. "{expected_chain_prefix}112A" or "112A"), got: "{position}"') | |
chain_prefix, number, letter = match.groups() | |
number = int(number) | |
if chain_prefix and expected_chain_prefix != chain_prefix: | |
raise IndexError(f'Use no prefix or "{expected_chain_prefix}" prefix for "{chain_type}" chain. ' | |
f'Got: "{chain_prefix}".') | |
return cls(chain_type=chain_type, number=number, letter=letter, scheme=scheme) | |
def is_heavy_chain(self): | |
return self.chain_type == 'H' | |
def is_light_chain(self): | |
return self.chain_type in 'KL' | |
def sort_positions(positions: List[str], chain_type: str, scheme: str) -> List: | |
"""Sort position strings to correct order based on given scheme""" | |
has_prefix = [p.startswith('H') or p.startswith('L') for p in positions] | |
assert all(has_prefix) or not any(has_prefix), 'Inconsistent position prefix' | |
has_prefix = all(has_prefix) | |
position_objects = [Position.from_string(p, chain_type=chain_type, scheme=scheme) for p in positions] | |
return [p.format(chain_type=has_prefix) for p in sorted(position_objects)] | |