import copy from typing import List, Union from abnumber.common import _validate_chain_type, SCHEME_POSITION_TO_REGION, SCHEME_VERNIER, POS_REGEX class Position: """Numbered position using a given numbering scheme Used as a key to store Position -> Amino acid information. Position objects are sortable according to the schema simply using ``sorted()``. """ def __init__(self, chain_type: str, number: int, letter: str, scheme: str): _validate_chain_type(chain_type) self.chain_type: str = chain_type self.number: int = int(number) self.letter: str = letter.strip() self.scheme: str = scheme self.cdr_definition: str = self.scheme self.cdr_definition_position: int = self.number def copy(self): return copy.copy(self) def _key(self): # Note: We are not including chain_type, but just Heavy/Light flag, to keep Kappa and Lambda chain positions equal return self.chain_type_prefix(), self.number, self.letter, self.scheme def __repr__(self): return f'{self.chain_type_prefix()}{self.number}{self.letter} ({self.scheme})' def __str__(self): return self.format() def set_cdr_definition(self, cdr_definition: str, cdr_definition_position: int): assert cdr_definition is not None, 'cdr_definition is required' assert cdr_definition_position is not None, 'cdr_definition_position is required' self.cdr_definition = cdr_definition self.cdr_definition_position = cdr_definition_position def format(self, chain_type=True, region=False, rjust=False, ljust=False, fillchar=' '): """Format Position to string :param chain_type: Add chain type prefix (H/L) :param region: Add region prefix (FR1, CDR1, ...) :param rjust: Align text to the right :param ljust: Align text to the left :param fillchar: Characer to use for alignment padding :return: formatted string """ formatted = f'{self.number}{self.letter}' if chain_type: formatted = f'{self.chain_type_prefix()}{formatted}' if region: formatted = f'{self.get_region()} {formatted}' just = 4 + 1* int(chain_type) + 5 * int(region) if rjust: formatted = formatted.rjust(just, fillchar) if ljust: formatted = formatted.ljust(just, fillchar) return formatted def __hash__(self): return self._key().__hash__() def __eq__(self, other): return isinstance(other, Position) and self._key() == other._key() def __ge__(self, other): return self == other or self > other def __le__(self, other): return self == other or self < other def __lt__(self, other): if not isinstance(other, Position): raise TypeError(f'Cannot compare Position object with {type(other)}: {other}') assert self.is_heavy_chain() == other.is_heavy_chain(), f'Positions do not come from the same chain: {self}, {other}' assert self.scheme == other.scheme, 'Comparing positions in different schemes is not implemented' return self._sort_key() < other._sort_key() def chain_type_prefix(self): if self.chain_type == 'H': return 'H' if self.chain_type in ['K', 'L']: return 'L' raise NotImplementedError(f'Unknown chain type "{self.chain_type}"') def _sort_key(self): letter_ord = ord(self.letter) if self.letter else 0 if self.scheme == 'imgt': if self.number in [33, 61, 112]: # position 112 is sorted in reverse letter_ord = -letter_ord elif self.scheme in ['chothia', 'kabat', 'aho']: # all letters are sorted alphabetically for these schemes pass else: raise NotImplementedError(f'Cannot compare positions of scheme: {self.scheme}') return self.is_heavy_chain(), self.number, letter_ord def get_region(self): """Get string name of this position's region :return: uppercase string, one of: ``"FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"`` """ if self.cdr_definition in SCHEME_POSITION_TO_REGION: regions = SCHEME_POSITION_TO_REGION[self.cdr_definition] else: regions = SCHEME_POSITION_TO_REGION[f'{self.cdr_definition}_{self.chain_type}'] return regions[self.cdr_definition_position] def is_in_cdr(self): """Check if given position is found in the CDR regions""" return self.get_region().lower().startswith('cdr') def is_in_vernier(self): if self.cdr_definition != 'kabat': raise NotImplementedError('Vernier zone identification is currently supported ' f'only with Kabat CDR definitions, got: {self.cdr_definition}') return self.cdr_definition_position in SCHEME_VERNIER.get(f'{self.cdr_definition}_{self.chain_type}', []) @classmethod def from_string(cls, position, chain_type, scheme): """Create Position object from string, e.g. "H5" Note that Positions parsed from string do not support separate CDR definitions. """ match = POS_REGEX.match(position.upper()) _validate_chain_type(chain_type) expected_chain_prefix = 'H' if chain_type == 'H' else 'L' if match is None: raise IndexError(f'Expected position format chainNumberLetter ' f'(e.g. "{expected_chain_prefix}112A" or "112A"), got: "{position}"') chain_prefix, number, letter = match.groups() number = int(number) if chain_prefix and expected_chain_prefix != chain_prefix: raise IndexError(f'Use no prefix or "{expected_chain_prefix}" prefix for "{chain_type}" chain. ' f'Got: "{chain_prefix}".') return cls(chain_type=chain_type, number=number, letter=letter, scheme=scheme) def is_heavy_chain(self): return self.chain_type == 'H' def is_light_chain(self): return self.chain_type in 'KL' def sort_positions(positions: List[str], chain_type: str, scheme: str) -> List: """Sort position strings to correct order based on given scheme""" has_prefix = [p.startswith('H') or p.startswith('L') for p in positions] assert all(has_prefix) or not any(has_prefix), 'Inconsistent position prefix' has_prefix = all(has_prefix) position_objects = [Position.from_string(p, chain_type=chain_type, scheme=scheme) for p in positions] return [p.format(chain_type=has_prefix) for p in sorted(position_objects)]