modelscope-studio / components /parse_markdown.py
Coloring's picture
feat: add Flow component
176823e
from html.parser import HTMLParser
def default_read_file(path):
with open(path, "r") as f:
return f.read()
enable_tags = ["demo", "demo-prefix", "demo-suffix", "file"]
class MarkdownParser(HTMLParser):
def __init__(self, read_file=None):
super().__init__()
self.value = [{"type": "text", "value": ""}]
self.tag_stack = []
self.read_file = read_file or default_read_file
self.current_tag = None
def get_value(self):
return self.value
def handle_data(self, data: str) -> None:
if self.value[-1]["type"] == "text":
self.value[-1]["value"] += data
elif self.current_tag is None:
self.value.append({"type": "text", "value": data})
elif self.current_tag == "demo-prefix":
self.value[-1]["prefix"] += data
elif self.current_tag == "demo-suffix":
self.value[-1]["suffix"] += data
def handle_startendtag(self, tag: str, attrs) -> None:
if tag not in enable_tags:
self.handle_data(self.get_starttag_text())
return
def handle_starttag(self, tag: str, attrs) -> None:
if (tag not in enable_tags):
self.handle_data(self.get_starttag_text())
return
if tag == "demo":
self.value.append({
"type":
"demo",
"code_position":
dict(attrs).get("code-position", 'left'),
"name":
dict(attrs)["name"],
"prefix":
"",
"suffix":
""
})
elif tag == "file":
content = self.read_file(dict(attrs)["src"])
if self.value[-1]["type"] == "text":
self.value[-1]["value"] += content
elif self.current_tag == "demo-prefix":
self.value[-1]["prefix"] += content
elif self.current_tag == "demo-suffix":
self.value[-1]["suffix"] += content
self.current_tag = tag
self.tag_stack.append(self.current_tag)
def handle_endtag(self, tag: str) -> None:
if (tag not in enable_tags):
self.handle_data(f"</{tag}>")
return
if (len(self.tag_stack) > 0):
self.tag_stack.pop()
if (len(self.tag_stack) > 0):
self.current_tag = self.tag_stack[-1]
else:
self.current_tag = None
else:
self.current_tag = None
def parse_markdown(markdown: str, read_file=None):
parser = MarkdownParser(read_file=read_file)
parser.feed(markdown)
return parser.get_value()