GameServerZ

Sleeping

App Files Files Community

GameServerZ / MLPY /Lib /site-packages /markdown /treeprocessors.py

Kano001

Upload 376 files

122d3ff verified over 1 year ago

raw

history blame

17.7 kB

	# Python Markdown

	# A Python implementation of John Gruber's Markdown.

	# Documentation: https://python-markdown.github.io/
	# GitHub: https://github.com/Python-Markdown/markdown/
	# PyPI: https://pypi.org/project/Markdown/

	# Started by Manfred Stienstra (http://www.dwerg.net/).
	# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
	# Currently maintained by Waylan Limberg (https://github.com/waylan),
	# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).

	# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
	# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
	# Copyright 2004 Manfred Stienstra (the original version)

	# License: BSD (see LICENSE.md for details).

	"""
	Tree processors manipulate the tree created by block processors. They can even create an entirely
	new `ElementTree` object. This is an excellent place for creating summaries, adding collected
	references, or last minute adjustments.

	"""

	from __future__ import annotations

	import re
	import xml.etree.ElementTree as etree
	from typing import TYPE_CHECKING, Any
	from . import util
	from . import inlinepatterns

	if TYPE_CHECKING: # pragma: no cover
	from markdown import Markdown


	def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Treeprocessor]:
	""" Build the default `treeprocessors` for Markdown. """
	treeprocessors = util.Registry()
	treeprocessors.register(InlineProcessor(md), 'inline', 20)
	treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10)
	treeprocessors.register(UnescapeTreeprocessor(md), 'unescape', 0)
	return treeprocessors


	def isString(s: object) -> bool:
	""" Return `True` if object is a string but not an [`AtomicString`][markdown.util.AtomicString]. """
	if not isinstance(s, util.AtomicString):
	return isinstance(s, str)
	return False


	class Treeprocessor(util.Processor):
	"""
	`Treeprocessor`s are run on the `ElementTree` object before serialization.

	Each `Treeprocessor` implements a `run` method that takes a pointer to an
	`Element` and modifies it as necessary.

	`Treeprocessors` must extend `markdown.Treeprocessor`.

	"""
	def run(self, root: etree.Element) -> etree.Element \| None:
	"""
	Subclasses of `Treeprocessor` should implement a `run` method, which
	takes a root `Element`. This method can return another `Element`
	object, and the existing root `Element` will be replaced, or it can
	modify the current tree and return `None`.
	"""
	pass # pragma: no cover


	class InlineProcessor(Treeprocessor):
	"""
	A `Treeprocessor` that traverses a tree, applying inline patterns.
	"""

	def __init__(self, md: Markdown):
	self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
	self.__placeholder_suffix = util.ETX
	self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
	+ len(self.__placeholder_suffix)
	self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
	self.md = md
	self.inlinePatterns = md.inlinePatterns
	self.ancestors: list[str] = []

	def __makePlaceholder(self, type: str) -> tuple[str, str]:
	""" Generate a placeholder """
	id = "%04d" % len(self.stashed_nodes)
	hash = util.INLINE_PLACEHOLDER % id
	return hash, id

	def __findPlaceholder(self, data: str, index: int) -> tuple[str \| None, int]:
	"""
	Extract id from data string, start from index.

	Arguments:
	data: String.
	index: Index, from which we start search.

	Returns:
	Placeholder id and string index, after the found placeholder.

	"""
	m = self.__placeholder_re.search(data, index)
	if m:
	return m.group(1), m.end()
	else:
	return None, index + 1

	def __stashNode(self, node: etree.Element \| str, type: str) -> str:
	""" Add node to stash. """
	placeholder, id = self.__makePlaceholder(type)
	self.stashed_nodes[id] = node
	return placeholder

	def __handleInline(self, data: str, patternIndex: int = 0) -> str:
	"""
	Process string with inline patterns and replace it with placeholders.

	Arguments:
	data: A line of Markdown text.
	patternIndex: The index of the `inlinePattern` to start with.

	Returns:
	String with placeholders.

	"""
	if not isinstance(data, util.AtomicString):
	startIndex = 0
	count = len(self.inlinePatterns)
	while patternIndex < count:
	data, matched, startIndex = self.__applyPattern(
	self.inlinePatterns[patternIndex], data, patternIndex, startIndex
	)
	if not matched:
	patternIndex += 1
	return data

	def __processElementText(self, node: etree.Element, subnode: etree.Element, isText: bool = True) -> None:
	"""
	Process placeholders in `Element.text` or `Element.tail`
	of Elements popped from `self.stashed_nodes`.

	Arguments:
	node: Parent node.
	subnode: Processing node.
	isText: Boolean variable, True - it's text, False - it's a tail.

	"""
	if isText:
	text = subnode.text
	subnode.text = None
	else:
	text = subnode.tail
	subnode.tail = None

	childResult = self.__processPlaceholders(text, subnode, isText)

	if not isText and node is not subnode:
	pos = list(node).index(subnode) + 1
	else:
	pos = 0

	childResult.reverse()
	for newChild in childResult:
	node.insert(pos, newChild[0])

	def __processPlaceholders(
	self,
	data: str \| None,
	parent: etree.Element,
	isText: bool = True
	) -> list[tuple[etree.Element, list[str]]]:
	"""
	Process string with placeholders and generate `ElementTree` tree.

	Arguments:
	data: String with placeholders instead of `ElementTree` elements.
	parent: Element, which contains processing inline data.
	isText: Boolean variable, True - it's text, False - it's a tail.

	Returns:
	List with `ElementTree` elements with applied inline patterns.

	"""
	def linkText(text: str \| None) -> None:
	if text:
	if result:
	if result[-1][0].tail:
	result[-1][0].tail += text
	else:
	result[-1][0].tail = text
	elif not isText:
	if parent.tail:
	parent.tail += text
	else:
	parent.tail = text
	else:
	if parent.text:
	parent.text += text
	else:
	parent.text = text
	result = []
	strartIndex = 0
	while data:
	index = data.find(self.__placeholder_prefix, strartIndex)
	if index != -1:
	id, phEndIndex = self.__findPlaceholder(data, index)

	if id in self.stashed_nodes:
	node = self.stashed_nodes.get(id)

	if index > 0:
	text = data[strartIndex:index]
	linkText(text)

	if not isinstance(node, str): # it's Element
	for child in [node] + list(node):
	if child.tail:
	if child.tail.strip():
	self.__processElementText(
	node, child, False
	)
	if child.text:
	if child.text.strip():
	self.__processElementText(child, child)
	else: # it's just a string
	linkText(node)
	strartIndex = phEndIndex
	continue

	strartIndex = phEndIndex
	result.append((node, self.ancestors[:]))

	else: # wrong placeholder
	end = index + len(self.__placeholder_prefix)
	linkText(data[strartIndex:end])
	strartIndex = end
	else:
	text = data[strartIndex:]
	if isinstance(data, util.AtomicString):
	# We don't want to loose the `AtomicString`
	text = util.AtomicString(text)
	linkText(text)
	data = ""

	return result

	def __applyPattern(
	self,
	pattern: inlinepatterns.Pattern,
	data: str,
	patternIndex: int,
	startIndex: int = 0
	) -> tuple[str, bool, int]:
	"""
	Check if the line fits the pattern, create the necessary
	elements, add it to `stashed_nodes`.

	Arguments:
	data: The text to be processed.
	pattern: The pattern to be checked.
	patternIndex: Index of current pattern.
	startIndex: String index, from which we start searching.

	Returns:
	String with placeholders instead of `ElementTree` elements.

	"""
	new_style = isinstance(pattern, inlinepatterns.InlineProcessor)

	for exclude in pattern.ANCESTOR_EXCLUDES:
	if exclude.lower() in self.ancestors:
	return data, False, 0

	if new_style:
	match = None
	# Since `handleMatch` may reject our first match,
	# we iterate over the buffer looking for matches
	# until we can't find any more.
	for match in pattern.getCompiledRegExp().finditer(data, startIndex):
	node, start, end = pattern.handleMatch(match, data)
	if start is None or end is None:
	startIndex += match.end(0)
	match = None
	continue
	break
	else: # pragma: no cover
	match = pattern.getCompiledRegExp().match(data[startIndex:])
	leftData = data[:startIndex]

	if not match:
	return data, False, 0

	if not new_style: # pragma: no cover
	node = pattern.handleMatch(match)
	start = match.start(0)
	end = match.end(0)

	if node is None:
	return data, True, end

	if not isinstance(node, str):
	if not isinstance(node.text, util.AtomicString):
	# We need to process current node too
	for child in [node] + list(node):
	if not isString(node):
	if child.text:
	self.ancestors.append(child.tag.lower())
	child.text = self.__handleInline(
	child.text, patternIndex + 1
	)
	self.ancestors.pop()
	if child.tail:
	child.tail = self.__handleInline(
	child.tail, patternIndex
	)

	placeholder = self.__stashNode(node, pattern.type())

	if new_style:
	return "{}{}{}".format(data[:start],
	placeholder, data[end:]), True, 0
	else: # pragma: no cover
	return "{}{}{}{}".format(leftData,
	match.group(1),
	placeholder, match.groups()[-1]), True, 0

	def __build_ancestors(self, parent: etree.Element \| None, parents: list[str]) -> None:
	"""Build the ancestor list."""
	ancestors = []
	while parent is not None:
	if parent is not None:
	ancestors.append(parent.tag.lower())
	parent = self.parent_map.get(parent)
	ancestors.reverse()
	parents.extend(ancestors)

	def run(self, tree: etree.Element, ancestors: list[str] \| None = None) -> etree.Element:
	"""Apply inline patterns to a parsed Markdown tree.

	Iterate over `Element`, find elements with inline tag, apply inline
	patterns and append newly created Elements to tree. To avoid further
	processing of string with inline patterns, instead of normal string,
	use subclass [`AtomicString`][markdown.util.AtomicString]:

	node.text = markdown.util.AtomicString("This will not be processed.")

	Arguments:
	tree: `Element` object, representing Markdown tree.
	ancestors: List of parent tag names that precede the tree node (if needed).

	Returns:
	An element tree object with applied inline patterns.

	"""
	self.stashed_nodes: dict[str, etree.Element \| str] = {}

	# Ensure a valid parent list, but copy passed in lists
	# to ensure we don't have the user accidentally change it on us.
	tree_parents = [] if ancestors is None else ancestors[:]

	self.parent_map = {c: p for p in tree.iter() for c in p}
	stack = [(tree, tree_parents)]

	while stack:
	currElement, parents = stack.pop()

	self.ancestors = parents
	self.__build_ancestors(currElement, self.ancestors)

	insertQueue = []
	for child in currElement:
	if child.text and not isinstance(
	child.text, util.AtomicString
	):
	self.ancestors.append(child.tag.lower())
	text = child.text
	child.text = None
	lst = self.__processPlaceholders(
	self.__handleInline(text), child
	)
	for item in lst:
	self.parent_map[item[0]] = child
	stack += lst
	insertQueue.append((child, lst))
	self.ancestors.pop()
	if child.tail:
	tail = self.__handleInline(child.tail)
	dumby = etree.Element('d')
	child.tail = None
	tailResult = self.__processPlaceholders(tail, dumby, False)
	if dumby.tail:
	child.tail = dumby.tail
	pos = list(currElement).index(child) + 1
	tailResult.reverse()
	for newChild in tailResult:
	self.parent_map[newChild[0]] = currElement
	currElement.insert(pos, newChild[0])
	if len(child):
	self.parent_map[child] = currElement
	stack.append((child, self.ancestors[:]))

	for element, lst in insertQueue:
	for i, obj in enumerate(lst):
	newChild = obj[0]
	element.insert(i, newChild)
	return tree


	class PrettifyTreeprocessor(Treeprocessor):
	""" Add line breaks to the html document. """

	def _prettifyETree(self, elem: etree.Element) -> None:
	""" Recursively add line breaks to `ElementTree` children. """

	i = "\n"
	if self.md.is_block_level(elem.tag) and elem.tag not in ['code', 'pre']:
	if (not elem.text or not elem.text.strip()) \
	and len(elem) and self.md.is_block_level(elem[0].tag):
	elem.text = i
	for e in elem:
	if self.md.is_block_level(e.tag):
	self._prettifyETree(e)
	if not elem.tail or not elem.tail.strip():
	elem.tail = i

	def run(self, root: etree.Element) -> None:
	""" Add line breaks to `Element` object and its children. """

	self._prettifyETree(root)
	# Do `<br />`'s separately as they are often in the middle of
	# inline content and missed by `_prettifyETree`.
	brs = root.iter('br')
	for br in brs:
	if not br.tail or not br.tail.strip():
	br.tail = '\n'
	else:
	br.tail = '\n%s' % br.tail
	# Clean up extra empty lines at end of code blocks.
	pres = root.iter('pre')
	for pre in pres:
	if len(pre) and pre[0].tag == 'code':
	code = pre[0]
	# Only prettify code containing text only
	if not len(code) and code.text is not None:
	code.text = util.AtomicString(code.text.rstrip() + '\n')


	class UnescapeTreeprocessor(Treeprocessor):
	""" Restore escaped chars """

	RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))

	def _unescape(self, m: re.Match[str]) -> str:
	return chr(int(m.group(1)))

	def unescape(self, text: str) -> str:
	return self.RE.sub(self._unescape, text)

	def run(self, root: etree.Element) -> None:
	""" Loop over all elements and unescape all text. """
	for elem in root.iter():
	# Unescape text content
	if elem.text and not elem.tag == 'code':
	elem.text = self.unescape(elem.text)
	# Unescape tail content
	if elem.tail:
	elem.tail = self.unescape(elem.tail)
	# Unescape attribute values
	for key, value in elem.items():
	elem.set(key, self.unescape(value))