Spaces:
Runtime error
Runtime error
Rework app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,16 @@
|
|
1 |
-
from typing import Dict, List
|
2 |
|
3 |
import gradio as gr
|
4 |
from lexenlem.preprocessing.adhoc import AdHocLemmatizer
|
5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
models: Dict[str, AdHocLemmatizer] = {
|
7 |
"Lemmatize": AdHocLemmatizer(path="vb_stanza_no_compound_no_deriv.pt", use_stanza=True),
|
8 |
"Lemmatize with special symbols": AdHocLemmatizer(
|
@@ -10,71 +18,38 @@ models: Dict[str, AdHocLemmatizer] = {
|
|
10 |
)
|
11 |
}
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
def predict(text: str, model_name: str) -> List[str]:
|
15 |
-
if model_name not in models:
|
16 |
-
raise RuntimeError("Unknown model")
|
17 |
-
return models[model_name](text)
|
18 |
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
|
|
|
21 |
fn=predict,
|
22 |
-
title=
|
23 |
-
description=
|
24 |
-
|
25 |
-
" The idea is to utilize the input of an external resource"
|
26 |
-
" (a `lexicon` — Vabamorf morphological analyzer in this particular case)"
|
27 |
-
" as an additional input to improve the results of a neural lemmatizer model. Said additional input"
|
28 |
-
" is a concatenation of one or more lemma candidates provided by Vabamorf. Morphological features and"
|
29 |
-
" the part of speech are provided by Stanza in this demo, although it's possible to use native Vabamorf"
|
30 |
-
" features as well (the results, however, are going to be slightly worse).\n\n"
|
31 |
-
" The lexicon-enhanced lemmatizer itself is based on an older version of Stanza. The models were"
|
32 |
-
" trained on the Estonian Dependency Treebank version 2.7.\n\n"
|
33 |
-
" Two variants of lemmatization are provided in the demo: regular lemmatization and lemmatization with"
|
34 |
-
" special symbols, which are `=` and `_`, denoting morphological derivation and separating parts of"
|
35 |
-
" compound words respectively. The latter was trained on the original data with Vabamorf set to output"
|
36 |
-
" these special symbols, while the latter was trained with `=` and `_` removed from the data and"
|
37 |
-
" vabamorf output.",
|
38 |
inputs=[
|
39 |
gr.inputs.Textbox(lines=7, label="Input text in the box below", placeholder="Text to lemmatize"),
|
40 |
-
gr.inputs.
|
41 |
],
|
42 |
outputs=[
|
43 |
gr.outputs.Textbox()
|
44 |
],
|
45 |
-
examples=
|
46 |
-
[
|
47 |
-
"Ekspositsioonid võiksid alata juba kunstihotellide fuajeedest.",
|
48 |
-
"Lemmatize"
|
49 |
-
],
|
50 |
-
[
|
51 |
-
"Ekspositsioonid võiksid alata juba kunstihotellide fuajeedest.",
|
52 |
-
"Lemmatize with special symbols"
|
53 |
-
],
|
54 |
-
[
|
55 |
-
"Kõik uuritavad võeti vastu TÜ üld- ja molekulaarpatoloogia instituudis inimesegeneetika uurimisrühmas.",
|
56 |
-
"Lemmatize with special symbols"
|
57 |
-
],
|
58 |
-
[
|
59 |
-
"Peamiselt viimasele toetub ka järgnev artikkel.",
|
60 |
-
"Lemmatize"
|
61 |
-
],
|
62 |
-
[
|
63 |
-
"Arutletakse selle üle, mida ülearuse rahaga peale hakata.",
|
64 |
-
"Lemmatize"
|
65 |
-
],
|
66 |
-
[
|
67 |
-
"Väikesele poisile tuuakse apteegist söögiisu tõstmiseks kalamaksaõli.",
|
68 |
-
"Lemmatize"
|
69 |
-
],
|
70 |
-
[
|
71 |
-
"Tulevased beebid olid justkui peegeldusena pilgu beebisinas ja veel mingi ähmane lubadus.",
|
72 |
-
"Lemmatize"
|
73 |
-
],
|
74 |
-
],
|
75 |
allow_screenshot=False,
|
76 |
allow_flagging="never",
|
77 |
)
|
78 |
|
79 |
-
|
80 |
-
gradio_ui.launch(debug=False, enable_queue=True)
|
|
|
1 |
+
from typing import Dict, List, Union
|
2 |
|
3 |
import gradio as gr
|
4 |
from lexenlem.preprocessing.adhoc import AdHocLemmatizer
|
5 |
|
6 |
+
title = "Lexicon-enhanced lemmatization for Estonian"
|
7 |
+
|
8 |
+
with open("./article.md") as file:
|
9 |
+
article: str = file.read()
|
10 |
+
|
11 |
+
with open("./description.txt") as file:
|
12 |
+
description: str = file.read()
|
13 |
+
|
14 |
models: Dict[str, AdHocLemmatizer] = {
|
15 |
"Lemmatize": AdHocLemmatizer(path="vb_stanza_no_compound_no_deriv.pt", use_stanza=True),
|
16 |
"Lemmatize with special symbols": AdHocLemmatizer(
|
|
|
18 |
)
|
19 |
}
|
20 |
|
21 |
+
examples: List[List[Union[str, bool]]] = []
|
22 |
+
with open("examples.tsv") as file:
|
23 |
+
for line in file:
|
24 |
+
ex, flag = line.split("\t")
|
25 |
+
flag = bool(int(flag))
|
26 |
+
examples.append(
|
27 |
+
[ex, flag]
|
28 |
+
)
|
29 |
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
def predict(text: str, output_special_symbols: bool) -> List[str]:
|
32 |
+
if output_special_symbols:
|
33 |
+
return models["Lemmatize with special symbols"](text)
|
34 |
+
else:
|
35 |
+
return models["Lemmatize"](text)
|
36 |
|
37 |
+
|
38 |
+
demo = gr.Interface(
|
39 |
fn=predict,
|
40 |
+
title=title,
|
41 |
+
description=description,
|
42 |
+
article=article,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
inputs=[
|
44 |
gr.inputs.Textbox(lines=7, label="Input text in the box below", placeholder="Text to lemmatize"),
|
45 |
+
gr.inputs.Checkbox(label="Output special symbols")
|
46 |
],
|
47 |
outputs=[
|
48 |
gr.outputs.Textbox()
|
49 |
],
|
50 |
+
examples=examples,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
allow_screenshot=False,
|
52 |
allow_flagging="never",
|
53 |
)
|
54 |
|
55 |
+
demo.launch(debug=False, enable_queue=True, cache_examples=True)
|
|