import gradio as gr
import tabulate
import matplotlib.pyplot as plt
import networkx as nx
from model import Parser
parser = Parser()
def parse(text):
output = parser.parse(text)
dependency_tree = render_dependency_tree(output["forms"], output["heads"], output["deprels"])
table = render_table(output["forms"], output["lemmas"], output["upos"], output["xpos"], output["ne"])
return dependency_tree, table
def render_dependency_tree(words, parents, labels):
fig, ax = plt.subplots(figsize=(32, 16))
# Create a directed graph
G = nx.DiGraph()
# Adding nodes to the graph
for i, word in enumerate(words):
G.add_node(i, label=word)
# Adding edges with labels
for i, (parent, label) in enumerate(zip(parents, labels)):
if parent != 0:
G.add_edge(parent - 1, i, label=label)
# Position nodes using Graphviz
pos = nx.nx_agraph.graphviz_layout(G, prog='dot')
# Draw the graph
nx.draw(G, pos, ax=ax, with_labels=True, labels=nx.get_node_attributes(G, 'label'),
arrows=True, node_color='#ffffff', node_size=0, node_shape='s', font_size=24, bbox = dict(facecolor="white", pad=10)
)
# Draw edge labels
edge_labels = nx.get_edge_attributes(G, 'label')
nx.draw_networkx_edge_labels(G, pos, ax=ax, edge_labels=edge_labels, rotate=False, alpha=0.9, font_size=18)
return fig
description = """
Norsk UD (Bokmål og Nynorsk)
"""
text = """1 President President PROPN NNP Number=Sing 5 nsubj 5:nsubj _
2 Bush Bush PROPN NNP Number=Sing 1 flat 1:flat _
3 on on ADP IN _ 4 case 4:case _
4 Tuesday Tuesday PROPN NNP Number=Sing 5 obl 5:obl:on _
5 nominated nominate VERB VBD Mood=Ind|Number=Sing|Person=3|Tense=Past|VerbForm=Fin 0 root 0:root _
6 two two NUM CD NumType=Card 7 nummod 7:nummod _
7 individuals individual NOUN NNS Number=Plur 5 obj 5:obj _
8 to to PART TO _ 9 mark 9:mark _
9 replace replace VERB VB VerbForm=Inf 5 advcl 5:advcl:to _
10 retiring retire VERB VBG VerbForm=Ger 11 amod 11:amod _
11 jurists jurist NOUN NNS Number=Plur 9 obj 9:obj _
12 on on ADP IN _ 14 case 14:case _
13 federal federal ADJ JJ Degree=Pos 14 amod 14:amod _
14 courts court NOUN NNS Number=Plur 11 nmod 11:nmod:on _
15 in in ADP IN _ 18 case 18:case _
16 the the DET DT Definite=Def|PronType=Art 18 det 18:det _
17 Washington Washington PROPN NNP Number=Sing 18 compound 18:compound _
18 area area NOUN NN Number=Sing 14 nmod 14:nmod:in SpaceAfter=No
19 . . PUNCT . _ 5 punct 5:punct _"""
forms = [
line.split("\t")[1]
for line in text.split("\n")
if line and not line.startswith("#")
]
lemmas = [
line.split("\t")[2]
for line in text.split("\n")
if line and not line.startswith("#")
]
upos = [
line.split("\t")[3]
for line in text.split("\n")
if line and not line.startswith("#")
]
xpos = [
line.split("\t")[4]
for line in text.split("\n")
if line and not line.startswith("#")
]
feats = [
line.split("\t")[5]
for line in text.split("\n")
if line and not line.startswith("#")
]
metadata = [
line.split("\t")[9]
for line in text.split("\n")
if line and not line.startswith("#")
]
edges = [
int(line.split("\t")[6])
for line in text.split("\n")
if line and not line.startswith("#")
]
edge_labels = [
line.split("\t")[7]
for line in text.split("\n")
if line and not line.startswith("#")
]
def render_table(forms, lemmas, upos, xpos, feats, named_entities):
feats = [[f"*{f.split('=')[0]}:* {f.split('=')[1]}" for f in (feat.split("|")) if '=' in f] for feat in feats]
max_len = max(1, max([len(feat) for feat in feats]))
feats = [feat + [""] * (max_len - len(feat)) for feat in feats]
feats = list(zip(*feats))
named_entities = [
"" if ne == "O" else f"<< {ne.split('-')[1]} >>" if ne.startswith("B") else ne.split('-')[1] if ne.startswith("I") and i - 1 < len(named_entities) and named_entities[i + 1].startswith("I") else f"{ne.split('-')[1]} >>"
for i, ne in enumerate(named_entities)
]
array = [
[""] + forms,
["*LEMMAS:*"] + lemmas,
["*UPOS:*"] + upos,
["*XPOS:*"] + xpos,
["*UFEATS:*"] + list(feats[0]),
*([""] + list(row) for row in feats[1:])
["*NE:*"] + named_entities,
]
#return tabulate.tabulate(array, headers="firstrow", tablefmt="unsafehtml")
return {"value": array[1:], "headers": array[0]}
custom_css = \
"""
/* Hide sort buttons at gr.DataFrame */
.sort-button {
display: none !important;
}
"""
with gr.Blocks(theme='sudeepshouche/minimalist', css=custom_css) as demo:
gr.HTML(description)
with gr.Row():
with gr.Column(scale=1, variant="panel"):
source = gr.Textbox(
label="Input sentence", placeholder="Write a sentende to parse", show_label=False, lines=1, max_lines=5, autofocus=True
)
submit = gr.Button("Submit", variant="primary")
with gr.Column(scale=1, variant="panel"):
dataset = gr.Dataset(components=[gr.Textbox(visible=False)],
label="Input examples",
samples=[
["Thomassen er på vei til sin neste gjerning."],
["På toppen av dette kom de metodiske utfordringer."],
["Berntsen har påtatt seg en både viktig og vanskelig oppgave."],
["Ikke bare har det vært et problem, som han selv skriver i forordet, å bli klok på Borten."],
]
)
table = gr.DataFrame(**render_table(forms, lemmas, upos, xpos, feats, metadata, edges, edge_labels), interactive=False, datatype="markdown")
dependency_plot = gr.Plot(render_dependency_tree(forms, edges, edge_labels), container=False)
source.submit(
fn=parse, inputs=["source"], outputs=["dependency_plot", "table"], queue=True
)
submit.click(
fn=parse, inputs=["source"], outputs=["dependency_plot", "table"], queue=True
)
dataset.click(
fn=lambda text: text, inputs=["dataset"], outputs=["source"]
).then(
fn=parse, inputs=["source"], outputs=["dependency_plot", "table"], queue=True
)
demo.queue(max_size=32, concurrency_count=2)
demo.launch()