Miras1984

Initial commit

f770010 almost 2 years ago

10.2 kB

	"""
	pygments.util
	~~~~~~~~~~~~~

	Utility functions.

	:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
	:license: BSD, see LICENSE for details.
	"""

	import re
	from io import TextIOWrapper


	split_path_re = re.compile(r'[/\\ ]')
	doctype_lookup_re = re.compile(r'''
	<!DOCTYPE\s+(
	[a-zA-Z_][a-zA-Z0-9]*
	(?: \s+ # optional in HTML5
	[a-zA-Z_][a-zA-Z0-9]*\s+
	"[^"]*")?
	)
	[^>]*>
	''', re.DOTALL \| re.MULTILINE \| re.VERBOSE)
	tag_re = re.compile(r'<(.+?)(\s.?)?>.?</.+?>',
	re.IGNORECASE \| re.DOTALL \| re.MULTILINE)
	xml_decl_re = re.compile(r'\s<\?xml[^>]\?>', re.I)


	class ClassNotFound(ValueError):
	"""Raised if one of the lookup functions didn't find a matching class."""


	class OptionError(Exception):
	"""
	This exception will be raised by all option processing functions if
	the type or value of the argument is not correct.
	"""

	def get_choice_opt(options, optname, allowed, default=None, normcase=False):
	"""
	If the key `optname` from the dictionary is not in the sequence
	`allowed`, raise an error, otherwise return it.
	"""
	string = options.get(optname, default)
	if normcase:
	string = string.lower()
	if string not in allowed:
	raise OptionError('Value for option %s must be one of %s' %
	(optname, ', '.join(map(str, allowed))))
	return string


	def get_bool_opt(options, optname, default=None):
	"""
	Intuitively, this is `options.get(optname, default)`, but restricted to
	Boolean value. The Booleans can be represented as string, in order to accept
	Boolean value from the command line arguments. If the key `optname` is
	present in the dictionary `options` and is not associated with a Boolean,
	raise an `OptionError`. If it is absent, `default` is returned instead.

	The valid string values for ``True`` are ``1``, ``yes``, ``true`` and
	``on``, the ones for ``False`` are ``0``, ``no``, ``false`` and ``off``
	(matched case-insensitively).
	"""
	string = options.get(optname, default)
	if isinstance(string, bool):
	return string
	elif isinstance(string, int):
	return bool(string)
	elif not isinstance(string, str):
	raise OptionError('Invalid type %r for option %s; use '
	'1/0, yes/no, true/false, on/off' % (
	string, optname))
	elif string.lower() in ('1', 'yes', 'true', 'on'):
	return True
	elif string.lower() in ('0', 'no', 'false', 'off'):
	return False
	else:
	raise OptionError('Invalid value %r for option %s; use '
	'1/0, yes/no, true/false, on/off' % (
	string, optname))


	def get_int_opt(options, optname, default=None):
	"""As :func:`get_bool_opt`, but interpret the value as an integer."""
	string = options.get(optname, default)
	try:
	return int(string)
	except TypeError:
	raise OptionError('Invalid type %r for option %s; you '
	'must give an integer value' % (
	string, optname))
	except ValueError:
	raise OptionError('Invalid value %r for option %s; you '
	'must give an integer value' % (
	string, optname))

	def get_list_opt(options, optname, default=None):
	"""
	If the key `optname` from the dictionary `options` is a string,
	split it at whitespace and return it. If it is already a list
	or a tuple, it is returned as a list.
	"""
	val = options.get(optname, default)
	if isinstance(val, str):
	return val.split()
	elif isinstance(val, (list, tuple)):
	return list(val)
	else:
	raise OptionError('Invalid type %r for option %s; you '
	'must give a list value' % (
	val, optname))


	def docstring_headline(obj):
	if not obj.__doc__:
	return ''
	res = []
	for line in obj.__doc__.strip().splitlines():
	if line.strip():
	res.append(" " + line.strip())
	else:
	break
	return ''.join(res).lstrip()


	def make_analysator(f):
	"""Return a static text analyser function that returns float values."""
	def text_analyse(text):
	try:
	rv = f(text)
	except Exception:
	return 0.0
	if not rv:
	return 0.0
	try:
	return min(1.0, max(0.0, float(rv)))
	except (ValueError, TypeError):
	return 0.0
	text_analyse.__doc__ = f.__doc__
	return staticmethod(text_analyse)


	def shebang_matches(text, regex):
	r"""Check if the given regular expression matches the last part of the
	shebang if one exists.

	>>> from pygments.util import shebang_matches
	>>> shebang_matches('#!/usr/bin/env python', r'python(2\.\d)?')
	True
	>>> shebang_matches('#!/usr/bin/python2.4', r'python(2\.\d)?')
	True
	>>> shebang_matches('#!/usr/bin/python-ruby', r'python(2\.\d)?')
	False
	>>> shebang_matches('#!/usr/bin/python/ruby', r'python(2\.\d)?')
	False
	>>> shebang_matches('#!/usr/bin/startsomethingwith python',
	... r'python(2\.\d)?')
	True

	It also checks for common windows executable file extensions::

	>>> shebang_matches('#!C:\\Python2.4\\Python.exe', r'python(2\.\d)?')
	True

	Parameters (``'-f'`` or ``'--foo'`` are ignored so ``'perl'`` does
	the same as ``'perl -e'``)

	Note that this method automatically searches the whole string (eg:
	the regular expression is wrapped in ``'^$'``)
	"""
	index = text.find('\n')
	if index >= 0:
	first_line = text[:index].lower()
	else:
	first_line = text.lower()
	if first_line.startswith('#!'):
	try:
	found = [x for x in split_path_re.split(first_line[2:].strip())
	if x and not x.startswith('-')][-1]
	except IndexError:
	return False
	regex = re.compile(r'^%s(\.(exe\|cmd\|bat\|bin))?$' % regex, re.IGNORECASE)
	if regex.search(found) is not None:
	return True
	return False


	def doctype_matches(text, regex):
	"""Check if the doctype matches a regular expression (if present).

	Note that this method only checks the first part of a DOCTYPE.
	eg: 'html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"'
	"""
	m = doctype_lookup_re.search(text)
	if m is None:
	return False
	doctype = m.group(1)
	return re.compile(regex, re.I).match(doctype.strip()) is not None


	def html_doctype_matches(text):
	"""Check if the file looks like it has a html doctype."""
	return doctype_matches(text, r'html')


	_looks_like_xml_cache = {}


	def looks_like_xml(text):
	"""Check if a doctype exists or if we have some tags."""
	if xml_decl_re.match(text):
	return True
	key = hash(text)
	try:
	return _looks_like_xml_cache[key]
	except KeyError:
	m = doctype_lookup_re.search(text)
	if m is not None:
	return True
	rv = tag_re.search(text[:1000]) is not None
	_looks_like_xml_cache[key] = rv
	return rv


	def surrogatepair(c):
	"""Given a unicode character code with length greater than 16 bits,
	return the two 16 bit surrogate pair.
	"""
	# From example D28 of:
	# http://www.unicode.org/book/ch03.pdf
	return (0xd7c0 + (c >> 10), (0xdc00 + (c & 0x3ff)))


	def format_lines(var_name, seq, raw=False, indent_level=0):
	"""Formats a sequence of strings for output."""
	lines = []
	base_indent = ' ' * indent_level * 4
	inner_indent = ' ' * (indent_level + 1) * 4
	lines.append(base_indent + var_name + ' = (')
	if raw:
	# These should be preformatted reprs of, say, tuples.
	for i in seq:
	lines.append(inner_indent + i + ',')
	else:
	for i in seq:
	# Force use of single quotes
	r = repr(i + '"')
	lines.append(inner_indent + r[:-2] + r[-1] + ',')
	lines.append(base_indent + ')')
	return '\n'.join(lines)


	def duplicates_removed(it, already_seen=()):
	"""
	Returns a list with duplicates removed from the iterable `it`.

	Order is preserved.
	"""
	lst = []
	seen = set()
	for i in it:
	if i in seen or i in already_seen:
	continue
	lst.append(i)
	seen.add(i)
	return lst


	class Future:
	"""Generic class to defer some work.

	Handled specially in RegexLexerMeta, to support regex string construction at
	first use.
	"""
	def get(self):
	raise NotImplementedError


	def guess_decode(text):
	"""Decode text with guessed encoding.

	First try UTF-8; this should fail for non-UTF-8 encodings.
	Then try the preferred locale encoding.
	Fall back to latin-1, which always works.
	"""
	try:
	text = text.decode('utf-8')
	return text, 'utf-8'
	except UnicodeDecodeError:
	try:
	import locale
	prefencoding = locale.getpreferredencoding()
	text = text.decode()
	return text, prefencoding
	except (UnicodeDecodeError, LookupError):
	text = text.decode('latin1')
	return text, 'latin1'


	def guess_decode_from_terminal(text, term):
	"""Decode text coming from terminal term.

	First try the terminal encoding, if given.
	Then try UTF-8. Then try the preferred locale encoding.
	Fall back to latin-1, which always works.
	"""
	if getattr(term, 'encoding', None):
	try:
	text = text.decode(term.encoding)
	except UnicodeDecodeError:
	pass
	else:
	return text, term.encoding
	return guess_decode(text)


	def terminal_encoding(term):
	"""Return our best guess of encoding for the given term."""
	if getattr(term, 'encoding', None):
	return term.encoding
	import locale
	return locale.getpreferredencoding()


	class UnclosingTextIOWrapper(TextIOWrapper):
	# Don't close underlying buffer on destruction.
	def close(self):
	self.flush()