Spaces:
Running
Running
# Attribute List Extension for Python-Markdown | |
# ============================================ | |
# Adds attribute list syntax. Inspired by | |
# [Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s | |
# feature of the same name. | |
# See https://Python-Markdown.github.io/extensions/attr_list | |
# for documentation. | |
# Original code Copyright 2011 [Waylan Limberg](http://achinghead.com/). | |
# All changes Copyright 2011-2014 The Python Markdown Project | |
# License: [BSD](https://opensource.org/licenses/bsd-license.php) | |
""" | |
Adds attribute list syntax. Inspired by | |
[Maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s | |
feature of the same name. | |
See the [documentation](https://Python-Markdown.github.io/extensions/attr_list) | |
for details. | |
""" | |
from __future__ import annotations | |
from typing import TYPE_CHECKING | |
from . import Extension | |
from ..treeprocessors import Treeprocessor | |
import re | |
if TYPE_CHECKING: # pragma: no cover | |
from xml.etree.ElementTree import Element | |
def _handle_double_quote(s, t): | |
k, v = t.split('=', 1) | |
return k, v.strip('"') | |
def _handle_single_quote(s, t): | |
k, v = t.split('=', 1) | |
return k, v.strip("'") | |
def _handle_key_value(s, t): | |
return t.split('=', 1) | |
def _handle_word(s, t): | |
if t.startswith('.'): | |
return '.', t[1:] | |
if t.startswith('#'): | |
return 'id', t[1:] | |
return t, t | |
_scanner = re.Scanner([ | |
(r'[^ =}]+=".*?"', _handle_double_quote), | |
(r"[^ =}]+='.*?'", _handle_single_quote), | |
(r'[^ =}]+=[^ =}]+', _handle_key_value), | |
(r'[^ =}]+', _handle_word), | |
(r' ', None) | |
]) | |
def get_attrs_and_remainder(attrs_string: str) -> tuple[list[tuple[str, str]], str]: | |
""" Parse attribute list and return a list of attribute tuples. | |
Additionally, return any text that remained after a curly brace. In typical cases, its presence | |
should mean that the input does not match the intended attribute list syntax. | |
""" | |
attrs, remainder = _scanner.scan(attrs_string) | |
# To keep historic behavior, discard all unparsable text prior to '}'. | |
index = remainder.find('}') | |
remainder = remainder[index:] if index != -1 else '' | |
return attrs, remainder | |
def get_attrs(str: str) -> list[tuple[str, str]]: # pragma: no cover | |
""" Soft-deprecated. Prefer `get_attrs_and_remainder`. """ | |
return get_attrs_and_remainder(str)[0] | |
def isheader(elem: Element) -> bool: | |
return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'] | |
class AttrListTreeprocessor(Treeprocessor): | |
BASE_RE = r'\{\:?[ ]*([^\}\n ][^\n]*)[ ]*\}' | |
HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE)) | |
BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE)) | |
INLINE_RE = re.compile(r'^{}'.format(BASE_RE)) | |
NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff' | |
r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d' | |
r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff' | |
r'\uf900-\ufdcf\ufdf0-\ufffd' | |
r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+') | |
def run(self, doc: Element) -> None: | |
for elem in doc.iter(): | |
if self.md.is_block_level(elem.tag): | |
# Block level: check for `attrs` on last line of text | |
RE = self.BLOCK_RE | |
if isheader(elem) or elem.tag in ['dt', 'td', 'th']: | |
# header, def-term, or table cell: check for attributes at end of element | |
RE = self.HEADER_RE | |
if len(elem) and elem.tag == 'li': | |
# special case list items. children may include a `ul` or `ol`. | |
pos = None | |
# find the `ul` or `ol` position | |
for i, child in enumerate(elem): | |
if child.tag in ['ul', 'ol']: | |
pos = i | |
break | |
if pos is None and elem[-1].tail: | |
# use tail of last child. no `ul` or `ol`. | |
m = RE.search(elem[-1].tail) | |
if m: | |
if not self.assign_attrs(elem, m.group(1), strict=True): | |
elem[-1].tail = elem[-1].tail[:m.start()] | |
elif pos is not None and pos > 0 and elem[pos-1].tail: | |
# use tail of last child before `ul` or `ol` | |
m = RE.search(elem[pos-1].tail) | |
if m: | |
if not self.assign_attrs(elem, m.group(1), strict=True): | |
elem[pos-1].tail = elem[pos-1].tail[:m.start()] | |
elif elem.text: | |
# use text. `ul` is first child. | |
m = RE.search(elem.text) | |
if m: | |
if not self.assign_attrs(elem, m.group(1), strict=True): | |
elem.text = elem.text[:m.start()] | |
elif len(elem) and elem[-1].tail: | |
# has children. Get from tail of last child | |
m = RE.search(elem[-1].tail) | |
if m: | |
if not self.assign_attrs(elem, m.group(1), strict=True): | |
elem[-1].tail = elem[-1].tail[:m.start()] | |
if isheader(elem): | |
# clean up trailing #s | |
elem[-1].tail = elem[-1].tail.rstrip('#').rstrip() | |
elif elem.text: | |
# no children. Get from text. | |
m = RE.search(elem.text) | |
if m: | |
if not self.assign_attrs(elem, m.group(1), strict=True): | |
elem.text = elem.text[:m.start()] | |
if isheader(elem): | |
# clean up trailing #s | |
elem.text = elem.text.rstrip('#').rstrip() | |
else: | |
# inline: check for `attrs` at start of tail | |
if elem.tail: | |
m = self.INLINE_RE.match(elem.tail) | |
if m: | |
remainder = self.assign_attrs(elem, m.group(1)) | |
elem.tail = elem.tail[m.end():] + remainder | |
def assign_attrs(self, elem: Element, attrs_string: str, *, strict: bool = False) -> str: | |
""" Assign `attrs` to element. | |
If the `attrs_string` has an extra closing curly brace, the remaining text is returned. | |
The `strict` argument controls whether to still assign `attrs` if there is a remaining `}`. | |
""" | |
attrs, remainder = get_attrs_and_remainder(attrs_string) | |
if strict and remainder: | |
return remainder | |
for k, v in attrs: | |
if k == '.': | |
# add to class | |
cls = elem.get('class') | |
if cls: | |
elem.set('class', '{} {}'.format(cls, v)) | |
else: | |
elem.set('class', v) | |
else: | |
# assign attribute `k` with `v` | |
elem.set(self.sanitize_name(k), v) | |
# The text that we initially over-matched will be put back. | |
return remainder | |
def sanitize_name(self, name: str) -> str: | |
""" | |
Sanitize name as 'an XML Name, minus the `:`.' | |
See <https://www.w3.org/TR/REC-xml-names/#NT-NCName>. | |
""" | |
return self.NAME_RE.sub('_', name) | |
class AttrListExtension(Extension): | |
""" Attribute List extension for Python-Markdown """ | |
def extendMarkdown(self, md): | |
md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8) | |
md.registerExtension(self) | |
def makeExtension(**kwargs): # pragma: no cover | |
return AttrListExtension(**kwargs) | |