better postprocessing
Browse files
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import os
|
3 |
|
4 |
-
os.system("pip3 install torch==1.10.1+cpu torchvision==0.11.2+cpu torchaudio==0.10.1+cpu -f "
|
5 |
-
|
6 |
|
7 |
import gradio as gr
|
8 |
from transformers import pipeline
|
@@ -29,9 +29,7 @@ ner_pipeline = pipeline(task='ner', model="crabz/slovakbert-ner")
|
|
29 |
nlp = spacy.blank("sk")
|
30 |
|
31 |
|
32 |
-
def
|
33 |
-
classifications = ner_pipeline(sentence)
|
34 |
-
|
35 |
entities = []
|
36 |
for i in range(len(classifications)):
|
37 |
if classifications[i]['entity'] != 0:
|
@@ -41,13 +39,37 @@ def apply_ner(sentence: str):
|
|
41 |
j += 1
|
42 |
entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'],
|
43 |
classifications[j - 1]['end']))
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
|
|
|
|
|
|
46 |
ents = []
|
47 |
for ee in entities:
|
48 |
ents.append(doc.char_span(ee[1], ee[2], ee[0]))
|
49 |
doc.ents = ents
|
|
|
|
|
50 |
|
|
|
|
|
|
|
|
|
51 |
displacy_html = displacy.render(doc, style="ent", options=options)
|
52 |
return displacy_html
|
53 |
|
@@ -57,9 +79,14 @@ intf = gr.Interface(fn=apply_ner, inputs="text", outputs="html", title='Slovak N
|
|
57 |
examples=[["Laboratóriá Úradu verejného zdravotníctva sekvenovaním potvrdili výskyt ďalších "
|
58 |
"štyroch prípadov variantu omikron na Slovensku."],
|
59 |
["Čaputová opakovane tvrdí, že \"spravodlivosť na Slovensku neplatí vždy pre všetkých "
|
60 |
-
"rovnako\"."]
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
64 |
article="")
|
65 |
intf.launch()
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import os
|
3 |
|
4 |
+
# os.system("pip3 install torch==1.10.1+cpu torchvision==0.11.2+cpu torchaudio==0.10.1+cpu -f "
|
5 |
+
# "https://download.pytorch.org/whl/cpu/torch_stable.html")
|
6 |
|
7 |
import gradio as gr
|
8 |
from transformers import pipeline
|
|
|
29 |
nlp = spacy.blank("sk")
|
30 |
|
31 |
|
32 |
+
def postprocess(classifications):
|
|
|
|
|
33 |
entities = []
|
34 |
for i in range(len(classifications)):
|
35 |
if classifications[i]['entity'] != 0:
|
|
|
39 |
j += 1
|
40 |
entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'],
|
41 |
classifications[j - 1]['end']))
|
42 |
+
to_remove = []
|
43 |
+
merged_entities = []
|
44 |
+
for i in range(len(entities)):
|
45 |
+
for j in range(i + 1, len(entities)):
|
46 |
+
if entities[i] != entities[j] and entities[i][0] == entities[j][0] and (entities[i][2] == entities[j][1] or
|
47 |
+
entities[i][1] == entities[j][2]):
|
48 |
+
to_remove.append(entities[i])
|
49 |
+
to_remove.append(entities[j])
|
50 |
+
|
51 |
+
new_start = min(entities[i][1], entities[j][1])
|
52 |
+
new_end = max(entities[i][2], entities[j][2])
|
53 |
+
merged_entities.append((entities[i][0], new_start, new_end))
|
54 |
+
for ent in to_remove:
|
55 |
+
entities.remove(ent)
|
56 |
+
entities += merged_entities
|
57 |
+
return entities
|
58 |
|
59 |
+
|
60 |
+
def set_entities(sentence, entities):
|
61 |
+
doc = nlp(sentence)
|
62 |
ents = []
|
63 |
for ee in entities:
|
64 |
ents.append(doc.char_span(ee[1], ee[2], ee[0]))
|
65 |
doc.ents = ents
|
66 |
+
return doc
|
67 |
+
|
68 |
|
69 |
+
def apply_ner(sentence: str):
|
70 |
+
classifications = ner_pipeline(sentence)
|
71 |
+
entities = postprocess(classifications)
|
72 |
+
doc = set_entities(sentence, entities)
|
73 |
displacy_html = displacy.render(doc, style="ent", options=options)
|
74 |
return displacy_html
|
75 |
|
|
|
79 |
examples=[["Laboratóriá Úradu verejného zdravotníctva sekvenovaním potvrdili výskyt ďalších "
|
80 |
"štyroch prípadov variantu omikron na Slovensku."],
|
81 |
["Čaputová opakovane tvrdí, že \"spravodlivosť na Slovensku neplatí vždy pre všetkých "
|
82 |
+
"rovnako\"."],
|
83 |
+
["Minister financií a líder mandátovo najsilnejšieho hnutia OĽaNO Igor Matovič "
|
84 |
+
"upozorňuje, že následky tretej vlny budú na Slovensku veľmi veľké."],
|
85 |
+
["Začiatkom roka sa objavili nezhody medzi Richardom Sulíkom a šéfom hnutia OĽANO "
|
86 |
+
"Igorom Matovičom, ktoré v istej miere pretrvávajú aj dodnes."]],
|
87 |
+
description="Named-entity recognition (NER) labels named-entities in unstructured text. This "
|
88 |
+
"implementation supports three labels: person (OSOBA), organization (ORGANIZÁCIA) and "
|
89 |
+
"location (LOKALITA). You can try out one of the examples below or type your own "
|
90 |
+
"sentence. Don't forget to use double quotes (\" \") instead of curved quotes („ “)",
|
91 |
article="")
|
92 |
intf.launch()
|