syntactic_tree / utils.py
nanom's picture
Minors fixes
65fca12
raw
history blame contribute delete
No virus
3.07 kB
import spacy
import subprocess
from typing import Tuple
from spacy import displacy
from anytree import Node, RenderTree
class Pipeline:
def __init__(
self
) -> None:
self.nlp = None
self.__ch_html_tree = None
self.__ch_str_tree = None
self.__ch_sentence = None
self.__init_nlp(model="en_core_web_md")
def __init_nlp(
self,
model: str
) -> None:
self.nlp = None
try:
self.nlp = spacy.load(model)
except:
print(f"* Downloading {model} model...")
_ = subprocess.Popen(
f"python -m spacy download {model}",
stdout=subprocess.PIPE,
shell=True
).communicate()
self.nlp = spacy.load(model)
def __postag(
self,
tk: str
) -> str:
tag = ""
plural_tags = ["NNS", "NNPS"]
if tk.tag_ in plural_tags:
tag = " ({}) (Plural)".format(tk.tag_)
else:
tag = " ({})".format(tk.tag_)
return tag
def __genSyntacticTree(
self,
expr: str
) -> Tuple[str,str]:
doc = self.nlp(expr)
root = next(doc.sents).root
node = Node("" + root.text + ": (Root)" + self.__postag(root), parent=None)
def tree(
tk: str,
last_node: Node,
depth: int
) -> None:
if tk.n_lefts + tk.n_rights > 0:
for child in tk.children:
tree(
child,
Node(
"" + child.text + ": " + str(depth + 1) + self.__postag(child),
parent=last_node
),
depth+1
)
tree(root, node, 0)
syntactic_str_tree = ""
for pre, fill, node in RenderTree(node):
syntactic_str_tree += """{}{}\n""".format(pre, node.name)
syntactic_tree = displacy.render(doc, style='dep', options={'distance': 100})
syntactic_html_tree = f"""
<center>
<div style='max-width: 800px; overflow-x:auto;'>
{syntactic_tree}
</div>
</center>
"""
return syntactic_html_tree, syntactic_str_tree
def compute(
self,
sentence: str
) -> Tuple[str,str,str]:
error = ""
error_template = """
<center>
<div class="alert alert-warning" role="alert">
<h6><b>{}</b></h6>
</div>
</center>
"""
if sentence.strip() == "":
error = error_template.format("The sentence can not be empty!")
return error, "", ""
if sentence != self.__ch_sentence:
self.__ch_sentence = sentence
self.__ch_html_tree, self.__ch_str_tree = self.__genSyntacticTree(sentence)
return error, self.__ch_html_tree, self.__ch_str_tree