|
import re |
|
import sys |
|
from ast import literal_eval |
|
from functools import total_ordering |
|
from typing import NamedTuple, Sequence, Union |
|
|
|
|
|
|
|
|
|
_NON_LINE_BREAKS = ( |
|
'\v', |
|
'\f', |
|
'\x1C', |
|
'\x1D', |
|
'\x1E', |
|
'\x85', |
|
|
|
'\u2028', |
|
'\u2029', |
|
) |
|
|
|
|
|
class Version(NamedTuple): |
|
major: int |
|
minor: int |
|
micro: int |
|
|
|
|
|
def split_lines(string: str, keepends: bool = False) -> Sequence[str]: |
|
r""" |
|
Intended for Python code. In contrast to Python's :py:meth:`str.splitlines`, |
|
looks at form feeds and other special characters as normal text. Just |
|
splits ``\n`` and ``\r\n``. |
|
Also different: Returns ``[""]`` for an empty string input. |
|
|
|
In Python 2.7 form feeds are used as normal characters when using |
|
str.splitlines. However in Python 3 somewhere there was a decision to split |
|
also on form feeds. |
|
""" |
|
if keepends: |
|
lst = string.splitlines(True) |
|
|
|
|
|
merge = [] |
|
for i, line in enumerate(lst): |
|
try: |
|
last_chr = line[-1] |
|
except IndexError: |
|
pass |
|
else: |
|
if last_chr in _NON_LINE_BREAKS: |
|
merge.append(i) |
|
|
|
for index in reversed(merge): |
|
try: |
|
lst[index] = lst[index] + lst[index + 1] |
|
del lst[index + 1] |
|
except IndexError: |
|
|
|
|
|
pass |
|
|
|
|
|
|
|
|
|
if string.endswith('\n') or string.endswith('\r') or string == '': |
|
lst.append('') |
|
return lst |
|
else: |
|
return re.split(r'\n|\r\n|\r', string) |
|
|
|
|
|
def python_bytes_to_unicode( |
|
source: Union[str, bytes], encoding: str = 'utf-8', errors: str = 'strict' |
|
) -> str: |
|
""" |
|
Checks for unicode BOMs and PEP 263 encoding declarations. Then returns a |
|
unicode object like in :py:meth:`bytes.decode`. |
|
|
|
:param encoding: See :py:meth:`bytes.decode` documentation. |
|
:param errors: See :py:meth:`bytes.decode` documentation. ``errors`` can be |
|
``'strict'``, ``'replace'`` or ``'ignore'``. |
|
""" |
|
def detect_encoding(): |
|
""" |
|
For the implementation of encoding definitions in Python, look at: |
|
- http://www.python.org/dev/peps/pep-0263/ |
|
- http://docs.python.org/2/reference/lexical_analysis.html#encoding-declarations |
|
""" |
|
byte_mark = literal_eval(r"b'\xef\xbb\xbf'") |
|
if source.startswith(byte_mark): |
|
|
|
return 'utf-8' |
|
|
|
first_two_lines = re.match(br'(?:[^\r\n]*(?:\r\n|\r|\n)){0,2}', source).group(0) |
|
possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)", |
|
first_two_lines) |
|
if possible_encoding: |
|
e = possible_encoding.group(1) |
|
if not isinstance(e, str): |
|
e = str(e, 'ascii', 'replace') |
|
return e |
|
else: |
|
|
|
return encoding |
|
|
|
if isinstance(source, str): |
|
|
|
return source |
|
|
|
encoding = detect_encoding() |
|
try: |
|
|
|
return str(source, encoding, errors) |
|
except LookupError: |
|
if errors == 'replace': |
|
|
|
|
|
|
|
|
|
return str(source, 'utf-8', errors) |
|
raise |
|
|
|
|
|
def version_info() -> Version: |
|
""" |
|
Returns a namedtuple of parso's version, similar to Python's |
|
``sys.version_info``. |
|
""" |
|
from parso import __version__ |
|
tupl = re.findall(r'[a-z]+|\d+', __version__) |
|
return Version(*[x if i == 3 else int(x) for i, x in enumerate(tupl)]) |
|
|
|
|
|
class _PythonVersionInfo(NamedTuple): |
|
major: int |
|
minor: int |
|
|
|
|
|
@total_ordering |
|
class PythonVersionInfo(_PythonVersionInfo): |
|
def __gt__(self, other): |
|
if isinstance(other, tuple): |
|
if len(other) != 2: |
|
raise ValueError("Can only compare to tuples of length 2.") |
|
return (self.major, self.minor) > other |
|
super().__gt__(other) |
|
|
|
return (self.major, self.minor) |
|
|
|
def __eq__(self, other): |
|
if isinstance(other, tuple): |
|
if len(other) != 2: |
|
raise ValueError("Can only compare to tuples of length 2.") |
|
return (self.major, self.minor) == other |
|
super().__eq__(other) |
|
|
|
def __ne__(self, other): |
|
return not self.__eq__(other) |
|
|
|
|
|
def _parse_version(version) -> PythonVersionInfo: |
|
match = re.match(r'(\d+)(?:\.(\d{1,2})(?:\.\d+)?)?((a|b|rc)\d)?$', version) |
|
if match is None: |
|
raise ValueError('The given version is not in the right format. ' |
|
'Use something like "3.8" or "3".') |
|
|
|
major = int(match.group(1)) |
|
minor = match.group(2) |
|
if minor is None: |
|
|
|
|
|
if major == 2: |
|
minor = "7" |
|
elif major == 3: |
|
minor = "6" |
|
else: |
|
raise NotImplementedError("Sorry, no support yet for those fancy new/old versions.") |
|
minor = int(minor) |
|
return PythonVersionInfo(major, minor) |
|
|
|
|
|
def parse_version_string(version: str = None) -> PythonVersionInfo: |
|
""" |
|
Checks for a valid version number (e.g. `3.8` or `3.10.1` or `3`) and |
|
returns a corresponding version info that is always two characters long in |
|
decimal. |
|
""" |
|
if version is None: |
|
version = '%s.%s' % sys.version_info[:2] |
|
if not isinstance(version, str): |
|
raise TypeError('version must be a string like "3.8"') |
|
|
|
return _parse_version(version) |
|
|