.*?)(?<=\n) # the code block
(?P=fence)[ ]*$ # closing fence
"""
),
re.MULTILINE | re.DOTALL | re.VERBOSE,
)
def get_line_range(re_match_obj, txt):
start_pos, end_pos = re_match_obj.regs[0]
num_newlines_before = txt[: start_pos + 1].count("\n")
line_start = num_newlines_before
line_end = num_newlines_before + txt[start_pos:end_pos].count("\n") + 1
return line_start, line_end
def fix_code_segment_indent(txt):
lines = []
change_any = False
txt_tmp = txt
while True:
re_match_obj = FENCED_BLOCK_RE.search(txt_tmp)
if not re_match_obj:
break
if len(lines) == 0:
lines = txt.split("\n")
# 清空 txt_tmp 对应的位置方便下次搜索
start_pos, end_pos = re_match_obj.regs[0]
txt_tmp = txt_tmp[:start_pos] + " " * (end_pos - start_pos) + txt_tmp[end_pos:]
line_start, line_end = get_line_range(re_match_obj, txt)
# 获取公共缩进
shared_indent_cnt = 1e5
for i in range(line_start, line_end):
stripped_string = lines[i].lstrip()
num_spaces = len(lines[i]) - len(stripped_string)
if num_spaces < shared_indent_cnt:
shared_indent_cnt = num_spaces
# 修复缩进
if (shared_indent_cnt < 1e5) and (shared_indent_cnt % 4) == 3:
num_spaces_should_be = math.ceil(shared_indent_cnt / 4) * 4
for i in range(line_start, line_end):
add_n = num_spaces_should_be - shared_indent_cnt
lines[i] = " " * add_n + lines[i]
if not change_any: # 遇到第一个
change_any = True
if change_any:
return "\n".join(lines)
else:
return txt
def fix_dollar_sticking_bug(txt):
"""
修复不标准的dollar公式符号的问题
"""
txt_result = ""
single_stack_height = 0
double_stack_height = 0
while True:
while True:
index = txt.find('$')
if index == -1:
txt_result += txt
return txt_result
if single_stack_height > 0:
if txt[:(index+1)].find('\n') > 0 or txt[:(index+1)].find('') > 0 or txt[:(index+1)].find(' | ') > 0:
print('公式之中出现了异常 (Unexpect element in equation)')
single_stack_height = 0
txt_result += ' $'
continue
if double_stack_height > 0:
if txt[:(index+1)].find('\n\n') > 0:
print('公式之中出现了异常 (Unexpect element in equation)')
double_stack_height = 0
txt_result += '$$'
continue
is_double = (txt[index+1] == '$')
if is_double:
if single_stack_height != 0:
# add a padding
txt = txt[:(index+1)] + " " + txt[(index+1):]
continue
if double_stack_height == 0:
double_stack_height = 1
else:
double_stack_height = 0
txt_result += txt[:(index+2)]
txt = txt[(index+2):]
else:
if double_stack_height != 0:
# print(txt[:(index)])
print('发现异常嵌套公式')
if single_stack_height == 0:
single_stack_height = 1
else:
single_stack_height = 0
# print(txt[:(index)])
txt_result += txt[:(index+1)]
txt = txt[(index+1):]
break
def markdown_convertion_for_file(txt):
"""
将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。
"""
from themes.theme import advanced_css
pre = f"""
GPT-Academic输出文档
"""
if txt.startswith(pre) and txt.endswith(suf):
# print('警告,输入了已经经过转化的字符串,二次转化可能出问题')
return txt # 已经被转化过,不需要再次转化
find_equation_pattern = r''
pattern = "|".join([pattern for pattern, property in mathpatterns.items() if not property["allow_multi_lines"]])
pattern = re.compile(pattern, flags=re.ASCII)
convert_stage_3 = pattern.sub(repl_fn, convert_stage_2)
convert_stage_4 = markdown_bug_hunt(convert_stage_3)
# 2. convert to rendered equation
convert_stage_5, n = re.subn(
find_equation_pattern, replace_math_render, convert_stage_4, flags=re.DOTALL
)
# cat them together
return pre + convert_stage_5 + suf
@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度
def markdown_convertion(txt):
"""
将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。
"""
pre = ''
suf = "
"
if txt.startswith(pre) and txt.endswith(suf):
# print('警告,输入了已经经过转化的字符串,二次转化可能出问题')
return txt # 已经被转化过,不需要再次转化
find_equation_pattern = r'