adorkin commited on
Commit
27343ee
1 Parent(s): 6e9f410

Rework app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -55
app.py CHANGED
@@ -1,8 +1,16 @@
1
- from typing import Dict, List
2
 
3
  import gradio as gr
4
  from lexenlem.preprocessing.adhoc import AdHocLemmatizer
5
 
 
 
 
 
 
 
 
 
6
  models: Dict[str, AdHocLemmatizer] = {
7
  "Lemmatize": AdHocLemmatizer(path="vb_stanza_no_compound_no_deriv.pt", use_stanza=True),
8
  "Lemmatize with special symbols": AdHocLemmatizer(
@@ -10,71 +18,38 @@ models: Dict[str, AdHocLemmatizer] = {
10
  )
11
  }
12
 
 
 
 
 
 
 
 
 
13
 
14
- def predict(text: str, model_name: str) -> List[str]:
15
- if model_name not in models:
16
- raise RuntimeError("Unknown model")
17
- return models[model_name](text)
18
 
 
 
 
 
 
19
 
20
- gradio_ui = gr.Interface(
 
21
  fn=predict,
22
- title="Lexicon-enhanced lemmatization for Estonian",
23
- description="The purpose of this demo is to demonstrate the results of"
24
- " Lexicon-Enhanced Neural Lemmatization for Estonian developed by TartuNLP research group."
25
- " The idea is to utilize the input of an external resource"
26
- " (a `lexicon` — Vabamorf morphological analyzer in this particular case)"
27
- " as an additional input to improve the results of a neural lemmatizer model. Said additional input"
28
- " is a concatenation of one or more lemma candidates provided by Vabamorf. Morphological features and"
29
- " the part of speech are provided by Stanza in this demo, although it's possible to use native Vabamorf"
30
- " features as well (the results, however, are going to be slightly worse).\n\n"
31
- " The lexicon-enhanced lemmatizer itself is based on an older version of Stanza. The models were"
32
- " trained on the Estonian Dependency Treebank version 2.7.\n\n"
33
- " Two variants of lemmatization are provided in the demo: regular lemmatization and lemmatization with"
34
- " special symbols, which are `=` and `_`, denoting morphological derivation and separating parts of"
35
- " compound words respectively. The latter was trained on the original data with Vabamorf set to output"
36
- " these special symbols, while the latter was trained with `=` and `_` removed from the data and"
37
- " vabamorf output.",
38
  inputs=[
39
  gr.inputs.Textbox(lines=7, label="Input text in the box below", placeholder="Text to lemmatize"),
40
- gr.inputs.Radio(list(models.keys()), label="Lemmatization type")
41
  ],
42
  outputs=[
43
  gr.outputs.Textbox()
44
  ],
45
- examples=[
46
- [
47
- "Ekspositsioonid võiksid alata juba kunstihotellide fuajeedest.",
48
- "Lemmatize"
49
- ],
50
- [
51
- "Ekspositsioonid võiksid alata juba kunstihotellide fuajeedest.",
52
- "Lemmatize with special symbols"
53
- ],
54
- [
55
- "Kõik uuritavad võeti vastu TÜ üld- ja molekulaarpatoloogia instituudis inimesegeneetika uurimisrühmas.",
56
- "Lemmatize with special symbols"
57
- ],
58
- [
59
- "Peamiselt viimasele toetub ka järgnev artikkel.",
60
- "Lemmatize"
61
- ],
62
- [
63
- "Arutletakse selle üle, mida ülearuse rahaga peale hakata.",
64
- "Lemmatize"
65
- ],
66
- [
67
- "Väikesele poisile tuuakse apteegist söögiisu tõstmiseks kalamaksaõli.",
68
- "Lemmatize"
69
- ],
70
- [
71
- "Tulevased beebid olid justkui peegeldusena pilgu beebisinas ja veel mingi ähmane lubadus.",
72
- "Lemmatize"
73
- ],
74
- ],
75
  allow_screenshot=False,
76
  allow_flagging="never",
77
  )
78
 
79
-
80
- gradio_ui.launch(debug=False, enable_queue=True)
 
1
+ from typing import Dict, List, Union
2
 
3
  import gradio as gr
4
  from lexenlem.preprocessing.adhoc import AdHocLemmatizer
5
 
6
+ title = "Lexicon-enhanced lemmatization for Estonian"
7
+
8
+ with open("./article.md") as file:
9
+ article: str = file.read()
10
+
11
+ with open("./description.txt") as file:
12
+ description: str = file.read()
13
+
14
  models: Dict[str, AdHocLemmatizer] = {
15
  "Lemmatize": AdHocLemmatizer(path="vb_stanza_no_compound_no_deriv.pt", use_stanza=True),
16
  "Lemmatize with special symbols": AdHocLemmatizer(
 
18
  )
19
  }
20
 
21
+ examples: List[List[Union[str, bool]]] = []
22
+ with open("examples.tsv") as file:
23
+ for line in file:
24
+ ex, flag = line.split("\t")
25
+ flag = bool(int(flag))
26
+ examples.append(
27
+ [ex, flag]
28
+ )
29
 
 
 
 
 
30
 
31
+ def predict(text: str, output_special_symbols: bool) -> List[str]:
32
+ if output_special_symbols:
33
+ return models["Lemmatize with special symbols"](text)
34
+ else:
35
+ return models["Lemmatize"](text)
36
 
37
+
38
+ demo = gr.Interface(
39
  fn=predict,
40
+ title=title,
41
+ description=description,
42
+ article=article,
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  inputs=[
44
  gr.inputs.Textbox(lines=7, label="Input text in the box below", placeholder="Text to lemmatize"),
45
+ gr.inputs.Checkbox(label="Output special symbols")
46
  ],
47
  outputs=[
48
  gr.outputs.Textbox()
49
  ],
50
+ examples=examples,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  allow_screenshot=False,
52
  allow_flagging="never",
53
  )
54
 
55
+ demo.launch(debug=False, enable_queue=True, cache_examples=True)