import markdown import re import os import math from textwrap import dedent from functools import lru_cache from pymdownx.superfences import fence_div_format, fence_code_format from latex2mathml.converter import convert as tex2mathml from shared_utils.config_loader import get_conf as get_conf pj = os.path.join default_user_name = 'default_user' markdown_extension_configs = { 'mdx_math': { 'enable_dollar_delimiter': True, 'use_gitlab_delimiters': False, }, } code_highlight_configs = { "pymdownx.superfences": { 'css_class': 'codehilite', "custom_fences": [ { 'name': 'mermaid', 'class': 'mermaid', 'format': fence_code_format } ] }, "pymdownx.highlight": { 'css_class': 'codehilite', 'guess_lang': True, # 'auto_title': True, # 'linenums': True } } def text_divide_paragraph(text): """ 将文本按照段落分隔符分割开,生成带有段落标签的HTML代码。 """ pre = '
' suf = '
' if text.startswith(pre) and text.endswith(suf): return text if '```' in text: # careful input return text elif '' in text: # careful input return text else: # whatever input lines = text.split("\n") for i, line in enumerate(lines): lines[i] = lines[i].replace(" ", " ") text = "
".join(lines) return pre + text + suf def tex2mathml_catch_exception(content, *args, **kwargs): try: content = tex2mathml(content, *args, **kwargs) except: content = content return content def replace_math_no_render(match): content = match.group(1) if 'mode=display' in match.group(0): content = content.replace('\n', '
') return f"$${content}$$" else: return f"${content}$" def replace_math_render(match): content = match.group(1) if 'mode=display' in match.group(0): if '\\begin{aligned}' in content: content = content.replace('\\begin{aligned}', '\\begin{array}') content = content.replace('\\end{aligned}', '\\end{array}') content = content.replace('&', ' ') content = tex2mathml_catch_exception(content, display="block") return content else: return tex2mathml_catch_exception(content) def markdown_bug_hunt(content): """ 解决一个mdx_math的bug(单$包裹begin命令时多余\n', '') return content def is_equation(txt): """ 判定是否为公式 | 测试1 写出洛伦兹定律,使用tex格式公式 测试2 给出柯西不等式,使用latex格式 测试3 写出麦克斯韦方程组 """ if '```' in txt and '```reference' not in txt: return False if '$' not in txt and '\\[' not in txt: return False mathpatterns = { r'(?^[ \t]*(?:~{3,}|`{3,}))[ ]* # opening fence ((\{(?P[^\}\n]*)\})| # (optional {attrs} or (\.?(?P[\w#.+-]*)[ ]*)? # optional (.)lang (hl_lines=(?P"|')(?P.*?)(?P=quot)[ ]*)?) # optional hl_lines) \n # newline (end of opening fence) (?P.*?)(?<=\n) # the code block (?P=fence)[ ]*$ # closing fence '''), re.MULTILINE | re.DOTALL | re.VERBOSE ) def get_line_range(re_match_obj, txt): start_pos, end_pos = re_match_obj.regs[0] num_newlines_before = txt[:start_pos+1].count('\n') line_start = num_newlines_before line_end = num_newlines_before + txt[start_pos:end_pos].count('\n')+1 return line_start, line_end def fix_code_segment_indent(txt): lines = [] change_any = False txt_tmp = txt while True: re_match_obj = FENCED_BLOCK_RE.search(txt_tmp) if not re_match_obj: break if len(lines) == 0: lines = txt.split("\n") # 清空 txt_tmp 对应的位置方便下次搜索 start_pos, end_pos = re_match_obj.regs[0] txt_tmp = txt_tmp[:start_pos] + ' '*(end_pos-start_pos) + txt_tmp[end_pos:] line_start, line_end = get_line_range(re_match_obj, txt) # 获取公共缩进 shared_indent_cnt = 1e5 for i in range(line_start, line_end): stripped_string = lines[i].lstrip() num_spaces = len(lines[i]) - len(stripped_string) if num_spaces < shared_indent_cnt: shared_indent_cnt = num_spaces # 修复缩进 if (shared_indent_cnt < 1e5) and (shared_indent_cnt % 4) == 3: num_spaces_should_be = math.ceil(shared_indent_cnt / 4) * 4 for i in range(line_start, line_end): add_n = num_spaces_should_be - shared_indent_cnt lines[i] = ' ' * add_n + lines[i] if not change_any: # 遇到第一个 change_any = True if change_any: return '\n'.join(lines) else: return txt @lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度 def markdown_convertion(txt): """ 将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。 """ pre = '
' suf = '
' if txt.startswith(pre) and txt.endswith(suf): # print('警告,输入了已经经过转化的字符串,二次转化可能出问题') return txt # 已经被转化过,不需要再次转化 find_equation_pattern = r'