Spaces:
Running
Running
| """ | |
| 代码高亮功能模块 - 处理 Python 代码的语法高亮 | |
| """ | |
| import html | |
| import re | |
| from constants import KEYWORD_RE, BUILTIN_RE, NUMBER_RE | |
| def split_comment(line: str) -> tuple[str, str]: | |
| """分离代码行中的注释部分""" | |
| in_single = False | |
| in_double = False | |
| escaped = False | |
| for index, ch in enumerate(line): | |
| if escaped: | |
| escaped = False | |
| continue | |
| if ch == "\\": | |
| escaped = True | |
| continue | |
| if ch == "'" and not in_double: | |
| in_single = not in_single | |
| continue | |
| if ch == '"' and not in_single: | |
| in_double = not in_double | |
| continue | |
| if ch == "#" and not in_single and not in_double: | |
| return line[:index], line[index:] | |
| return line, "" | |
| def tokenize_strings(code: str) -> list[tuple[str, str]]: | |
| """将代码分解为字符串和文本段""" | |
| segments: list[tuple[str, str]] = [] | |
| index = 0 | |
| while index < len(code): | |
| ch = code[index] | |
| if ch in ("'", '"'): | |
| quote = ch | |
| start = index | |
| index += 1 | |
| escaped = False | |
| while index < len(code): | |
| if escaped: | |
| escaped = False | |
| index += 1 | |
| continue | |
| if code[index] == "\\": | |
| escaped = True | |
| index += 1 | |
| continue | |
| if code[index] == quote: | |
| index += 1 | |
| break | |
| index += 1 | |
| segments.append(("string", code[start:index])) | |
| continue | |
| start = index | |
| while index < len(code) and code[index] not in ("'", '"'): | |
| index += 1 | |
| segments.append(("text", code[start:index])) | |
| return segments | |
| def highlight_text_segment(text: str) -> str: | |
| """高亮文本段中的关键字、内置函数和数字""" | |
| escaped = html.escape(text) | |
| escaped = KEYWORD_RE.sub(r'<span class="tok-keyword">\1</span>', escaped) | |
| escaped = BUILTIN_RE.sub(r'<span class="tok-builtin">\1</span>', escaped) | |
| escaped = NUMBER_RE.sub(r'<span class="tok-number">\1</span>', escaped) | |
| return escaped | |
| def highlight_python_line(line: str) -> str: | |
| """高亮单行 Python 代码""" | |
| code, comment = split_comment(line) | |
| segments = tokenize_strings(code) | |
| rendered: list[str] = [] | |
| for kind, text in segments: | |
| if kind == "string": | |
| rendered.append('<span class="tok-string">{}</span>'.format(html.escape(text))) | |
| else: | |
| rendered.append(highlight_text_segment(text)) | |
| if comment: | |
| rendered.append('<span class="tok-comment">{}</span>'.format(html.escape(comment))) | |
| return "".join(rendered) | |
| def build_plain_code_html(code: str, block_id: str) -> str: | |
| """构建简单的代码 HTML 块""" | |
| lines = code.splitlines() | |
| rendered = [f'<div id="{block_id}" class="rosa-code">'] | |
| for index, line in enumerate(lines, start=1): | |
| highlighted = highlight_python_line(line) | |
| rendered.append( | |
| '<div class="code-line">' | |
| '<span class="line-no">{line}</span>' | |
| '<span class="line-text">{text}</span>' | |
| "</div>".format(line=index, text=highlighted) | |
| ) | |
| rendered.append("</div>") | |
| return "\n".join(rendered) | |