Spaces:
Runtime error
Runtime error
""" | |
pygments.util | |
~~~~~~~~~~~~~ | |
Utility functions. | |
:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. | |
:license: BSD, see LICENSE for details. | |
""" | |
import re | |
from io import TextIOWrapper | |
split_path_re = re.compile(r'[/\\ ]') | |
doctype_lookup_re = re.compile(r''' | |
<!DOCTYPE\s+( | |
[a-zA-Z_][a-zA-Z0-9]* | |
(?: \s+ # optional in HTML5 | |
[a-zA-Z_][a-zA-Z0-9]*\s+ | |
"[^"]*")? | |
) | |
[^>]*> | |
''', re.DOTALL | re.MULTILINE | re.VERBOSE) | |
tag_re = re.compile(r'<(.+?)(\s.*?)?>.*?</.+?>', | |
re.IGNORECASE | re.DOTALL | re.MULTILINE) | |
xml_decl_re = re.compile(r'\s*<\?xml[^>]*\?>', re.I) | |
class ClassNotFound(ValueError): | |
"""Raised if one of the lookup functions didn't find a matching class.""" | |
class OptionError(Exception): | |
""" | |
This exception will be raised by all option processing functions if | |
the type or value of the argument is not correct. | |
""" | |
def get_choice_opt(options, optname, allowed, default=None, normcase=False): | |
""" | |
If the key `optname` from the dictionary is not in the sequence | |
`allowed`, raise an error, otherwise return it. | |
""" | |
string = options.get(optname, default) | |
if normcase: | |
string = string.lower() | |
if string not in allowed: | |
raise OptionError('Value for option %s must be one of %s' % | |
(optname, ', '.join(map(str, allowed)))) | |
return string | |
def get_bool_opt(options, optname, default=None): | |
""" | |
Intuitively, this is `options.get(optname, default)`, but restricted to | |
Boolean value. The Booleans can be represented as string, in order to accept | |
Boolean value from the command line arguments. If the key `optname` is | |
present in the dictionary `options` and is not associated with a Boolean, | |
raise an `OptionError`. If it is absent, `default` is returned instead. | |
The valid string values for ``True`` are ``1``, ``yes``, ``true`` and | |
``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off`` | |
(matched case-insensitively). | |
""" | |
string = options.get(optname, default) | |
if isinstance(string, bool): | |
return string | |
elif isinstance(string, int): | |
return bool(string) | |
elif not isinstance(string, str): | |
raise OptionError('Invalid type %r for option %s; use ' | |
'1/0, yes/no, true/false, on/off' % ( | |
string, optname)) | |
elif string.lower() in ('1', 'yes', 'true', 'on'): | |
return True | |
elif string.lower() in ('0', 'no', 'false', 'off'): | |
return False | |
else: | |
raise OptionError('Invalid value %r for option %s; use ' | |
'1/0, yes/no, true/false, on/off' % ( | |
string, optname)) | |
def get_int_opt(options, optname, default=None): | |
"""As :func:`get_bool_opt`, but interpret the value as an integer.""" | |
string = options.get(optname, default) | |
try: | |
return int(string) | |
except TypeError: | |
raise OptionError('Invalid type %r for option %s; you ' | |
'must give an integer value' % ( | |
string, optname)) | |
except ValueError: | |
raise OptionError('Invalid value %r for option %s; you ' | |
'must give an integer value' % ( | |
string, optname)) | |
def get_list_opt(options, optname, default=None): | |
""" | |
If the key `optname` from the dictionary `options` is a string, | |
split it at whitespace and return it. If it is already a list | |
or a tuple, it is returned as a list. | |
""" | |
val = options.get(optname, default) | |
if isinstance(val, str): | |
return val.split() | |
elif isinstance(val, (list, tuple)): | |
return list(val) | |
else: | |
raise OptionError('Invalid type %r for option %s; you ' | |
'must give a list value' % ( | |
val, optname)) | |
def docstring_headline(obj): | |
if not obj.__doc__: | |
return '' | |
res = [] | |
for line in obj.__doc__.strip().splitlines(): | |
if line.strip(): | |
res.append(" " + line.strip()) | |
else: | |
break | |
return ''.join(res).lstrip() | |
def make_analysator(f): | |
"""Return a static text analyser function that returns float values.""" | |
def text_analyse(text): | |
try: | |
rv = f(text) | |
except Exception: | |
return 0.0 | |
if not rv: | |
return 0.0 | |
try: | |
return min(1.0, max(0.0, float(rv))) | |
except (ValueError, TypeError): | |
return 0.0 | |
text_analyse.__doc__ = f.__doc__ | |
return staticmethod(text_analyse) | |
def shebang_matches(text, regex): | |
r"""Check if the given regular expression matches the last part of the | |
shebang if one exists. | |
>>> from pygments.util import shebang_matches | |
>>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?') | |
True | |
>>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?') | |
True | |
>>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?') | |
False | |
>>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?') | |
False | |
>>> shebang_matches('#!/usr/bin/startsomethingwith python', | |
... r'python(2\.\d)?') | |
True | |
It also checks for common windows executable file extensions:: | |
>>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?') | |
True | |
Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does | |
the same as ``'perl -e'``) | |
Note that this method automatically searches the whole string (eg: | |
the regular expression is wrapped in ``'^$'``) | |
""" | |
index = text.find('\n') | |
if index >= 0: | |
first_line = text[:index].lower() | |
else: | |
first_line = text.lower() | |
if first_line.startswith('#!'): | |
try: | |
found = [x for x in split_path_re.split(first_line[2:].strip()) | |
if x and not x.startswith('-')][-1] | |
except IndexError: | |
return False | |
regex = re.compile(r'^%s(\.(exe|cmd|bat|bin))?$' % regex, re.IGNORECASE) | |
if regex.search(found) is not None: | |
return True | |
return False | |
def doctype_matches(text, regex): | |
"""Check if the doctype matches a regular expression (if present). | |
Note that this method only checks the first part of a DOCTYPE. | |
eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"' | |
""" | |
m = doctype_lookup_re.search(text) | |
if m is None: | |
return False | |
doctype = m.group(1) | |
return re.compile(regex, re.I).match(doctype.strip()) is not None | |
def html_doctype_matches(text): | |
"""Check if the file looks like it has a html doctype.""" | |
return doctype_matches(text, r'html') | |
_looks_like_xml_cache = {} | |
def looks_like_xml(text): | |
"""Check if a doctype exists or if we have some tags.""" | |
if xml_decl_re.match(text): | |
return True | |
key = hash(text) | |
try: | |
return _looks_like_xml_cache[key] | |
except KeyError: | |
m = doctype_lookup_re.search(text) | |
if m is not None: | |
return True | |
rv = tag_re.search(text[:1000]) is not None | |
_looks_like_xml_cache[key] = rv | |
return rv | |
def surrogatepair(c): | |
"""Given a unicode character code with length greater than 16 bits, | |
return the two 16 bit surrogate pair. | |
""" | |
# From example D28 of: | |
# http://www.unicode.org/book/ch03.pdf | |
return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff))) | |
def format_lines(var_name, seq, raw=False, indent_level=0): | |
"""Formats a sequence of strings for output.""" | |
lines = [] | |
base_indent = ' ' * indent_level * 4 | |
inner_indent = ' ' * (indent_level + 1) * 4 | |
lines.append(base_indent + var_name + ' = (') | |
if raw: | |
# These should be preformatted reprs of, say, tuples. | |
for i in seq: | |
lines.append(inner_indent + i + ',') | |
else: | |
for i in seq: | |
# Force use of single quotes | |
r = repr(i + '"') | |
lines.append(inner_indent + r[:-2] + r[-1] + ',') | |
lines.append(base_indent + ')') | |
return '\n'.join(lines) | |
def duplicates_removed(it, already_seen=()): | |
""" | |
Returns a list with duplicates removed from the iterable `it`. | |
Order is preserved. | |
""" | |
lst = [] | |
seen = set() | |
for i in it: | |
if i in seen or i in already_seen: | |
continue | |
lst.append(i) | |
seen.add(i) | |
return lst | |
class Future: | |
"""Generic class to defer some work. | |
Handled specially in RegexLexerMeta, to support regex string construction at | |
first use. | |
""" | |
def get(self): | |
raise NotImplementedError | |
def guess_decode(text): | |
"""Decode *text* with guessed encoding. | |
First try UTF-8; this should fail for non-UTF-8 encodings. | |
Then try the preferred locale encoding. | |
Fall back to latin-1, which always works. | |
""" | |
try: | |
text = text.decode('utf-8') | |
return text, 'utf-8' | |
except UnicodeDecodeError: | |
try: | |
import locale | |
prefencoding = locale.getpreferredencoding() | |
text = text.decode() | |
return text, prefencoding | |
except (UnicodeDecodeError, LookupError): | |
text = text.decode('latin1') | |
return text, 'latin1' | |
def guess_decode_from_terminal(text, term): | |
"""Decode *text* coming from terminal *term*. | |
First try the terminal encoding, if given. | |
Then try UTF-8. Then try the preferred locale encoding. | |
Fall back to latin-1, which always works. | |
""" | |
if getattr(term, 'encoding', None): | |
try: | |
text = text.decode(term.encoding) | |
except UnicodeDecodeError: | |
pass | |
else: | |
return text, term.encoding | |
return guess_decode(text) | |
def terminal_encoding(term): | |
"""Return our best guess of encoding for the given *term*.""" | |
if getattr(term, 'encoding', None): | |
return term.encoding | |
import locale | |
return locale.getpreferredencoding() | |
class UnclosingTextIOWrapper(TextIOWrapper): | |
# Don't close underlying buffer on destruction. | |
def close(self): | |
self.flush() | |