Spaces:
Sleeping
Sleeping
whisper-large-v3
/
venv
/lib
/python3.10
/site-packages
/pip
/_vendor
/pygments
/formatters
/latex.py
""" | |
pygments.formatters.latex | |
~~~~~~~~~~~~~~~~~~~~~~~~~ | |
Formatter for LaTeX fancyvrb output. | |
:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS. | |
:license: BSD, see LICENSE for details. | |
""" | |
from io import StringIO | |
from pip._vendor.pygments.formatter import Formatter | |
from pip._vendor.pygments.lexer import Lexer, do_insertions | |
from pip._vendor.pygments.token import Token, STANDARD_TYPES | |
from pip._vendor.pygments.util import get_bool_opt, get_int_opt | |
__all__ = ['LatexFormatter'] | |
def escape_tex(text, commandprefix): | |
return text.replace('\\', '\x00'). \ | |
replace('{', '\x01'). \ | |
replace('}', '\x02'). \ | |
replace('\x00', r'\%sZbs{}' % commandprefix). \ | |
replace('\x01', r'\%sZob{}' % commandprefix). \ | |
replace('\x02', r'\%sZcb{}' % commandprefix). \ | |
replace('^', r'\%sZca{}' % commandprefix). \ | |
replace('_', r'\%sZus{}' % commandprefix). \ | |
replace('&', r'\%sZam{}' % commandprefix). \ | |
replace('<', r'\%sZlt{}' % commandprefix). \ | |
replace('>', r'\%sZgt{}' % commandprefix). \ | |
replace('#', r'\%sZsh{}' % commandprefix). \ | |
replace('%', r'\%sZpc{}' % commandprefix). \ | |
replace('$', r'\%sZdl{}' % commandprefix). \ | |
replace('-', r'\%sZhy{}' % commandprefix). \ | |
replace("'", r'\%sZsq{}' % commandprefix). \ | |
replace('"', r'\%sZdq{}' % commandprefix). \ | |
replace('~', r'\%sZti{}' % commandprefix) | |
DOC_TEMPLATE = r''' | |
\documentclass{%(docclass)s} | |
\usepackage{fancyvrb} | |
\usepackage{color} | |
\usepackage[%(encoding)s]{inputenc} | |
%(preamble)s | |
%(styledefs)s | |
\begin{document} | |
\section*{%(title)s} | |
%(code)s | |
\end{document} | |
''' | |
## Small explanation of the mess below :) | |
# | |
# The previous version of the LaTeX formatter just assigned a command to | |
# each token type defined in the current style. That obviously is | |
# problematic if the highlighted code is produced for a different style | |
# than the style commands themselves. | |
# | |
# This version works much like the HTML formatter which assigns multiple | |
# CSS classes to each <span> tag, from the most specific to the least | |
# specific token type, thus falling back to the parent token type if one | |
# is not defined. Here, the classes are there too and use the same short | |
# forms given in token.STANDARD_TYPES. | |
# | |
# Highlighted code now only uses one custom command, which by default is | |
# \PY and selectable by the commandprefix option (and in addition the | |
# escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for | |
# backwards compatibility purposes). | |
# | |
# \PY has two arguments: the classes, separated by +, and the text to | |
# render in that style. The classes are resolved into the respective | |
# style commands by magic, which serves to ignore unknown classes. | |
# | |
# The magic macros are: | |
# * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text | |
# to render in \PY@do. Their definition determines the style. | |
# * \PY@reset resets \PY@it etc. to do nothing. | |
# * \PY@toks parses the list of classes, using magic inspired by the | |
# keyval package (but modified to use plusses instead of commas | |
# because fancyvrb redefines commas inside its environments). | |
# * \PY@tok processes one class, calling the \PY@tok@classname command | |
# if it exists. | |
# * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style | |
# for its class. | |
# * \PY resets the style, parses the classnames and then calls \PY@do. | |
# | |
# Tip: to read this code, print it out in substituted form using e.g. | |
# >>> print STYLE_TEMPLATE % {'cp': 'PY'} | |
STYLE_TEMPLATE = r''' | |
\makeatletter | |
\def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%% | |
\let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%% | |
\let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax} | |
\def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname} | |
\def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%% | |
\%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi} | |
\def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%% | |
\%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}} | |
\def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}} | |
%(styles)s | |
\def\%(cp)sZbs{\char`\\} | |
\def\%(cp)sZus{\char`\_} | |
\def\%(cp)sZob{\char`\{} | |
\def\%(cp)sZcb{\char`\}} | |
\def\%(cp)sZca{\char`\^} | |
\def\%(cp)sZam{\char`\&} | |
\def\%(cp)sZlt{\char`\<} | |
\def\%(cp)sZgt{\char`\>} | |
\def\%(cp)sZsh{\char`\#} | |
\def\%(cp)sZpc{\char`\%%} | |
\def\%(cp)sZdl{\char`\$} | |
\def\%(cp)sZhy{\char`\-} | |
\def\%(cp)sZsq{\char`\'} | |
\def\%(cp)sZdq{\char`\"} | |
\def\%(cp)sZti{\char`\~} | |
%% for compatibility with earlier versions | |
\def\%(cp)sZat{@} | |
\def\%(cp)sZlb{[} | |
\def\%(cp)sZrb{]} | |
\makeatother | |
''' | |
def _get_ttype_name(ttype): | |
fname = STANDARD_TYPES.get(ttype) | |
if fname: | |
return fname | |
aname = '' | |
while fname is None: | |
aname = ttype[-1] + aname | |
ttype = ttype.parent | |
fname = STANDARD_TYPES.get(ttype) | |
return fname + aname | |
class LatexFormatter(Formatter): | |
r""" | |
Format tokens as LaTeX code. This needs the `fancyvrb` and `color` | |
standard packages. | |
Without the `full` option, code is formatted as one ``Verbatim`` | |
environment, like this: | |
.. sourcecode:: latex | |
\begin{Verbatim}[commandchars=\\\{\}] | |
\PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}): | |
\PY{k}{pass} | |
\end{Verbatim} | |
Wrapping can be disabled using the `nowrap` option. | |
The special command used here (``\PY``) and all the other macros it needs | |
are output by the `get_style_defs` method. | |
With the `full` option, a complete LaTeX document is output, including | |
the command definitions in the preamble. | |
The `get_style_defs()` method of a `LatexFormatter` returns a string | |
containing ``\def`` commands defining the macros needed inside the | |
``Verbatim`` environments. | |
Additional options accepted: | |
`nowrap` | |
If set to ``True``, don't wrap the tokens at all, not even inside a | |
``\begin{Verbatim}`` environment. This disables most other options | |
(default: ``False``). | |
`style` | |
The style to use, can be a string or a Style subclass (default: | |
``'default'``). | |
`full` | |
Tells the formatter to output a "full" document, i.e. a complete | |
self-contained document (default: ``False``). | |
`title` | |
If `full` is true, the title that should be used to caption the | |
document (default: ``''``). | |
`docclass` | |
If the `full` option is enabled, this is the document class to use | |
(default: ``'article'``). | |
`preamble` | |
If the `full` option is enabled, this can be further preamble commands, | |
e.g. ``\usepackage`` (default: ``''``). | |
`linenos` | |
If set to ``True``, output line numbers (default: ``False``). | |
`linenostart` | |
The line number for the first line (default: ``1``). | |
`linenostep` | |
If set to a number n > 1, only every nth line number is printed. | |
`verboptions` | |
Additional options given to the Verbatim environment (see the *fancyvrb* | |
docs for possible values) (default: ``''``). | |
`commandprefix` | |
The LaTeX commands used to produce colored output are constructed | |
using this prefix and some letters (default: ``'PY'``). | |
.. versionadded:: 0.7 | |
.. versionchanged:: 0.10 | |
The default is now ``'PY'`` instead of ``'C'``. | |
`texcomments` | |
If set to ``True``, enables LaTeX comment lines. That is, LaTex markup | |
in comment tokens is not escaped so that LaTeX can render it (default: | |
``False``). | |
.. versionadded:: 1.2 | |
`mathescape` | |
If set to ``True``, enables LaTeX math mode escape in comments. That | |
is, ``'$...$'`` inside a comment will trigger math mode (default: | |
``False``). | |
.. versionadded:: 1.2 | |
`escapeinside` | |
If set to a string of length 2, enables escaping to LaTeX. Text | |
delimited by these 2 characters is read as LaTeX code and | |
typeset accordingly. It has no effect in string literals. It has | |
no effect in comments if `texcomments` or `mathescape` is | |
set. (default: ``''``). | |
.. versionadded:: 2.0 | |
`envname` | |
Allows you to pick an alternative environment name replacing Verbatim. | |
The alternate environment still has to support Verbatim's option syntax. | |
(default: ``'Verbatim'``). | |
.. versionadded:: 2.0 | |
""" | |
name = 'LaTeX' | |
aliases = ['latex', 'tex'] | |
filenames = ['*.tex'] | |
def __init__(self, **options): | |
Formatter.__init__(self, **options) | |
self.nowrap = get_bool_opt(options, 'nowrap', False) | |
self.docclass = options.get('docclass', 'article') | |
self.preamble = options.get('preamble', '') | |
self.linenos = get_bool_opt(options, 'linenos', False) | |
self.linenostart = abs(get_int_opt(options, 'linenostart', 1)) | |
self.linenostep = abs(get_int_opt(options, 'linenostep', 1)) | |
self.verboptions = options.get('verboptions', '') | |
self.nobackground = get_bool_opt(options, 'nobackground', False) | |
self.commandprefix = options.get('commandprefix', 'PY') | |
self.texcomments = get_bool_opt(options, 'texcomments', False) | |
self.mathescape = get_bool_opt(options, 'mathescape', False) | |
self.escapeinside = options.get('escapeinside', '') | |
if len(self.escapeinside) == 2: | |
self.left = self.escapeinside[0] | |
self.right = self.escapeinside[1] | |
else: | |
self.escapeinside = '' | |
self.envname = options.get('envname', 'Verbatim') | |
self._create_stylesheet() | |
def _create_stylesheet(self): | |
t2n = self.ttype2name = {Token: ''} | |
c2d = self.cmd2def = {} | |
cp = self.commandprefix | |
def rgbcolor(col): | |
if col: | |
return ','.join(['%.2f' % (int(col[i] + col[i + 1], 16) / 255.0) | |
for i in (0, 2, 4)]) | |
else: | |
return '1,1,1' | |
for ttype, ndef in self.style: | |
name = _get_ttype_name(ttype) | |
cmndef = '' | |
if ndef['bold']: | |
cmndef += r'\let\$$@bf=\textbf' | |
if ndef['italic']: | |
cmndef += r'\let\$$@it=\textit' | |
if ndef['underline']: | |
cmndef += r'\let\$$@ul=\underline' | |
if ndef['roman']: | |
cmndef += r'\let\$$@ff=\textrm' | |
if ndef['sans']: | |
cmndef += r'\let\$$@ff=\textsf' | |
if ndef['mono']: | |
cmndef += r'\let\$$@ff=\textsf' | |
if ndef['color']: | |
cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' % | |
rgbcolor(ndef['color'])) | |
if ndef['border']: | |
cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{\string -\fboxrule}' | |
r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}}' % | |
(rgbcolor(ndef['border']), | |
rgbcolor(ndef['bgcolor']))) | |
elif ndef['bgcolor']: | |
cmndef += (r'\def\$$@bc##1{{\setlength{\fboxsep}{0pt}' | |
r'\colorbox[rgb]{%s}{\strut ##1}}}' % | |
rgbcolor(ndef['bgcolor'])) | |
if cmndef == '': | |
continue | |
cmndef = cmndef.replace('$$', cp) | |
t2n[ttype] = name | |
c2d[name] = cmndef | |
def get_style_defs(self, arg=''): | |
""" | |
Return the command sequences needed to define the commands | |
used to format text in the verbatim environment. ``arg`` is ignored. | |
""" | |
cp = self.commandprefix | |
styles = [] | |
for name, definition in self.cmd2def.items(): | |
styles.append(r'\@namedef{%s@tok@%s}{%s}' % (cp, name, definition)) | |
return STYLE_TEMPLATE % {'cp': self.commandprefix, | |
'styles': '\n'.join(styles)} | |
def format_unencoded(self, tokensource, outfile): | |
# TODO: add support for background colors | |
t2n = self.ttype2name | |
cp = self.commandprefix | |
if self.full: | |
realoutfile = outfile | |
outfile = StringIO() | |
if not self.nowrap: | |
outfile.write('\\begin{' + self.envname + '}[commandchars=\\\\\\{\\}') | |
if self.linenos: | |
start, step = self.linenostart, self.linenostep | |
outfile.write(',numbers=left' + | |
(start and ',firstnumber=%d' % start or '') + | |
(step and ',stepnumber=%d' % step or '')) | |
if self.mathescape or self.texcomments or self.escapeinside: | |
outfile.write(',codes={\\catcode`\\$=3\\catcode`\\^=7' | |
'\\catcode`\\_=8\\relax}') | |
if self.verboptions: | |
outfile.write(',' + self.verboptions) | |
outfile.write(']\n') | |
for ttype, value in tokensource: | |
if ttype in Token.Comment: | |
if self.texcomments: | |
# Try to guess comment starting lexeme and escape it ... | |
start = value[0:1] | |
for i in range(1, len(value)): | |
if start[0] != value[i]: | |
break | |
start += value[i] | |
value = value[len(start):] | |
start = escape_tex(start, cp) | |
# ... but do not escape inside comment. | |
value = start + value | |
elif self.mathescape: | |
# Only escape parts not inside a math environment. | |
parts = value.split('$') | |
in_math = False | |
for i, part in enumerate(parts): | |
if not in_math: | |
parts[i] = escape_tex(part, cp) | |
in_math = not in_math | |
value = '$'.join(parts) | |
elif self.escapeinside: | |
text = value | |
value = '' | |
while text: | |
a, sep1, text = text.partition(self.left) | |
if sep1: | |
b, sep2, text = text.partition(self.right) | |
if sep2: | |
value += escape_tex(a, cp) + b | |
else: | |
value += escape_tex(a + sep1 + b, cp) | |
else: | |
value += escape_tex(a, cp) | |
else: | |
value = escape_tex(value, cp) | |
elif ttype not in Token.Escape: | |
value = escape_tex(value, cp) | |
styles = [] | |
while ttype is not Token: | |
try: | |
styles.append(t2n[ttype]) | |
except KeyError: | |
# not in current style | |
styles.append(_get_ttype_name(ttype)) | |
ttype = ttype.parent | |
styleval = '+'.join(reversed(styles)) | |
if styleval: | |
spl = value.split('\n') | |
for line in spl[:-1]: | |
if line: | |
outfile.write("\\%s{%s}{%s}" % (cp, styleval, line)) | |
outfile.write('\n') | |
if spl[-1]: | |
outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1])) | |
else: | |
outfile.write(value) | |
if not self.nowrap: | |
outfile.write('\\end{' + self.envname + '}\n') | |
if self.full: | |
encoding = self.encoding or 'utf8' | |
# map known existings encodings from LaTeX distribution | |
encoding = { | |
'utf_8': 'utf8', | |
'latin_1': 'latin1', | |
'iso_8859_1': 'latin1', | |
}.get(encoding.replace('-', '_'), encoding) | |
realoutfile.write(DOC_TEMPLATE % | |
dict(docclass = self.docclass, | |
preamble = self.preamble, | |
title = self.title, | |
encoding = encoding, | |
styledefs = self.get_style_defs(), | |
code = outfile.getvalue())) | |
class LatexEmbeddedLexer(Lexer): | |
""" | |
This lexer takes one lexer as argument, the lexer for the language | |
being formatted, and the left and right delimiters for escaped text. | |
First everything is scanned using the language lexer to obtain | |
strings and comments. All other consecutive tokens are merged and | |
the resulting text is scanned for escaped segments, which are given | |
the Token.Escape type. Finally text that is not escaped is scanned | |
again with the language lexer. | |
""" | |
def __init__(self, left, right, lang, **options): | |
self.left = left | |
self.right = right | |
self.lang = lang | |
Lexer.__init__(self, **options) | |
def get_tokens_unprocessed(self, text): | |
# find and remove all the escape tokens (replace with an empty string) | |
# this is very similar to DelegatingLexer.get_tokens_unprocessed. | |
buffered = '' | |
insertions = [] | |
insertion_buf = [] | |
for i, t, v in self._find_safe_escape_tokens(text): | |
if t is None: | |
if insertion_buf: | |
insertions.append((len(buffered), insertion_buf)) | |
insertion_buf = [] | |
buffered += v | |
else: | |
insertion_buf.append((i, t, v)) | |
if insertion_buf: | |
insertions.append((len(buffered), insertion_buf)) | |
return do_insertions(insertions, | |
self.lang.get_tokens_unprocessed(buffered)) | |
def _find_safe_escape_tokens(self, text): | |
""" find escape tokens that are not in strings or comments """ | |
for i, t, v in self._filter_to( | |
self.lang.get_tokens_unprocessed(text), | |
lambda t: t in Token.Comment or t in Token.String | |
): | |
if t is None: | |
for i2, t2, v2 in self._find_escape_tokens(v): | |
yield i + i2, t2, v2 | |
else: | |
yield i, None, v | |
def _filter_to(self, it, pred): | |
""" Keep only the tokens that match `pred`, merge the others together """ | |
buf = '' | |
idx = 0 | |
for i, t, v in it: | |
if pred(t): | |
if buf: | |
yield idx, None, buf | |
buf = '' | |
yield i, t, v | |
else: | |
if not buf: | |
idx = i | |
buf += v | |
if buf: | |
yield idx, None, buf | |
def _find_escape_tokens(self, text): | |
""" Find escape tokens within text, give token=None otherwise """ | |
index = 0 | |
while text: | |
a, sep1, text = text.partition(self.left) | |
if a: | |
yield index, None, a | |
index += len(a) | |
if sep1: | |
b, sep2, text = text.partition(self.right) | |
if sep2: | |
yield index + len(sep1), Token.Escape, b | |
index += len(sep1) + len(b) + len(sep2) | |
else: | |
yield index, Token.Error, sep1 | |
index += len(sep1) | |
text = b | |