| from __future__ import annotations |
|
|
| import contextlib |
| import re |
| from dataclasses import dataclass |
| from typing import Iterator, NoReturn |
|
|
| from .specifiers import Specifier |
|
|
|
|
| @dataclass |
| class Token: |
| name: str |
| text: str |
| position: int |
|
|
|
|
| class ParserSyntaxError(Exception): |
| """The provided source text could not be parsed correctly.""" |
|
|
| def __init__( |
| self, |
| message: str, |
| *, |
| source: str, |
| span: tuple[int, int], |
| ) -> None: |
| self.span = span |
| self.message = message |
| self.source = source |
|
|
| super().__init__() |
|
|
| def __str__(self) -> str: |
| marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" |
| return "\n ".join([self.message, self.source, marker]) |
|
|
|
|
| DEFAULT_RULES: dict[str, str | re.Pattern[str]] = { |
| "LEFT_PARENTHESIS": r"\(", |
| "RIGHT_PARENTHESIS": r"\)", |
| "LEFT_BRACKET": r"\[", |
| "RIGHT_BRACKET": r"\]", |
| "SEMICOLON": r";", |
| "COMMA": r",", |
| "QUOTED_STRING": re.compile( |
| r""" |
| ( |
| ('[^']*') |
| | |
| ("[^"]*") |
| ) |
| """, |
| re.VERBOSE, |
| ), |
| "OP": r"(===|==|~=|!=|<=|>=|<|>)", |
| "BOOLOP": r"\b(or|and)\b", |
| "IN": r"\bin\b", |
| "NOT": r"\bnot\b", |
| "VARIABLE": re.compile( |
| r""" |
| \b( |
| python_version |
| |python_full_version |
| |os[._]name |
| |sys[._]platform |
| |platform_(release|system) |
| |platform[._](version|machine|python_implementation) |
| |python_implementation |
| |implementation_(name|version) |
| |extras? |
| |dependency_groups |
| )\b |
| """, |
| re.VERBOSE, |
| ), |
| "SPECIFIER": re.compile( |
| Specifier._operator_regex_str + Specifier._version_regex_str, |
| re.VERBOSE | re.IGNORECASE, |
| ), |
| "AT": r"\@", |
| "URL": r"[^ \t]+", |
| "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", |
| "VERSION_PREFIX_TRAIL": r"\.\*", |
| "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", |
| "WS": r"[ \t]+", |
| "END": r"$", |
| } |
|
|
|
|
| class Tokenizer: |
| """Context-sensitive token parsing. |
| |
| Provides methods to examine the input stream to check whether the next token |
| matches. |
| """ |
|
|
| def __init__( |
| self, |
| source: str, |
| *, |
| rules: dict[str, str | re.Pattern[str]], |
| ) -> None: |
| self.source = source |
| self.rules: dict[str, re.Pattern[str]] = { |
| name: re.compile(pattern) for name, pattern in rules.items() |
| } |
| self.next_token: Token | None = None |
| self.position = 0 |
|
|
| def consume(self, name: str) -> None: |
| """Move beyond provided token name, if at current position.""" |
| if self.check(name): |
| self.read() |
|
|
| def check(self, name: str, *, peek: bool = False) -> bool: |
| """Check whether the next token has the provided name. |
| |
| By default, if the check succeeds, the token *must* be read before |
| another check. If `peek` is set to `True`, the token is not loaded and |
| would need to be checked again. |
| """ |
| assert self.next_token is None, ( |
| f"Cannot check for {name!r}, already have {self.next_token!r}" |
| ) |
| assert name in self.rules, f"Unknown token name: {name!r}" |
|
|
| expression = self.rules[name] |
|
|
| match = expression.match(self.source, self.position) |
| if match is None: |
| return False |
| if not peek: |
| self.next_token = Token(name, match[0], self.position) |
| return True |
|
|
| def expect(self, name: str, *, expected: str) -> Token: |
| """Expect a certain token name next, failing with a syntax error otherwise. |
| |
| The token is *not* read. |
| """ |
| if not self.check(name): |
| raise self.raise_syntax_error(f"Expected {expected}") |
| return self.read() |
|
|
| def read(self) -> Token: |
| """Consume the next token and return it.""" |
| token = self.next_token |
| assert token is not None |
|
|
| self.position += len(token.text) |
| self.next_token = None |
|
|
| return token |
|
|
| def raise_syntax_error( |
| self, |
| message: str, |
| *, |
| span_start: int | None = None, |
| span_end: int | None = None, |
| ) -> NoReturn: |
| """Raise ParserSyntaxError at the given position.""" |
| span = ( |
| self.position if span_start is None else span_start, |
| self.position if span_end is None else span_end, |
| ) |
| raise ParserSyntaxError( |
| message, |
| source=self.source, |
| span=span, |
| ) |
|
|
| @contextlib.contextmanager |
| def enclosing_tokens( |
| self, open_token: str, close_token: str, *, around: str |
| ) -> Iterator[None]: |
| if self.check(open_token): |
| open_position = self.position |
| self.read() |
| else: |
| open_position = None |
|
|
| yield |
|
|
| if open_position is None: |
| return |
|
|
| if not self.check(close_token): |
| self.raise_syntax_error( |
| f"Expected matching {close_token} for {open_token}, after {around}", |
| span_start=open_position, |
| ) |
|
|
| self.read() |
|
|