getapi commited on
Commit
0422eb5
1 Parent(s): 5cf78a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -101,13 +101,22 @@ def md_to_dom(markdown_text: str) -> list[dict[str, str | list | dict | None]]:
101
  return {'tag': element.tag, 'attrs': {'src': element.get('src')}}
102
 
103
  def parse_children(element) -> list[str | dict[str, str | list | dict | None]]:
104
- return [parse_element(child) if child.tag else child.strip() for child in element.iterchildren() if child.tag or (isinstance(child, str) and child.strip())]
 
 
 
 
 
 
 
 
 
 
105
 
106
  def parse_element(element) -> dict[str, str | list | dict | None]:
107
  handlers = {'h1': handle_heading, 'h2': handle_heading, 'h3': handle_heading, 'h4': handle_heading, 'h5': handle_heading, 'h6': handle_heading, 'ul': handle_list, 'ol': handle_list, 'a': handle_link, 'img': handle_media, 'iframe': handle_media}
108
  handler = handlers.get(element.tag, lambda e: {'tag': e.tag, 'children': parse_children(e)})
109
  return handler(element)
110
-
111
  html_content = markdown(markdown_text, extensions=['extra', 'sane_lists'])
112
  tree = html.fromstring(html_content)
113
  return [parse_element(element) for element in tree.body]
 
101
  return {'tag': element.tag, 'attrs': {'src': element.get('src')}}
102
 
103
  def parse_children(element) -> list[str | dict[str, str | list | dict | None]]:
104
+ children = []
105
+ for child in element.iterchildren():
106
+ if child.tag:
107
+ children.append(parse_element(child))
108
+ elif isinstance(child, str):
109
+ children.append(child.strip())
110
+ if element.text and element.text.strip():
111
+ children.insert(0, element.text.strip())
112
+ if element.tail and element.tail.strip():
113
+ children.append(element.tail.strip())
114
+ return children
115
 
116
  def parse_element(element) -> dict[str, str | list | dict | None]:
117
  handlers = {'h1': handle_heading, 'h2': handle_heading, 'h3': handle_heading, 'h4': handle_heading, 'h5': handle_heading, 'h6': handle_heading, 'ul': handle_list, 'ol': handle_list, 'a': handle_link, 'img': handle_media, 'iframe': handle_media}
118
  handler = handlers.get(element.tag, lambda e: {'tag': e.tag, 'children': parse_children(e)})
119
  return handler(element)
 
120
  html_content = markdown(markdown_text, extensions=['extra', 'sane_lists'])
121
  tree = html.fromstring(html_content)
122
  return [parse_element(element) for element in tree.body]