Spaces:
Sleeping
Sleeping
AlzbetaStrompova
commited on
Commit
•
081d311
1
Parent(s):
9d2f4c9
change layout
Browse files- app.py +16 -4
- flagged/log.csv +8 -0
- website_script.py +2 -1
app.py
CHANGED
@@ -15,11 +15,23 @@ examples = [
|
|
15 |
def ner(text):
|
16 |
result = run(tokenizer, model, gazetteers_for_matching, text)
|
17 |
return {"text": text, "entities": result}
|
|
|
|
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
if __name__ == "__main__":
|
25 |
demo.launch()
|
|
|
15 |
def ner(text):
|
16 |
result = run(tokenizer, model, gazetteers_for_matching, text)
|
17 |
return {"text": text, "entities": result}
|
18 |
+
with gr.Blocks(css="footer{display:none !important}", theme=gr.themes.Default(primary_hue="blue", secondary_hue="sky")) as demo:
|
19 |
+
# with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
20 |
|
21 |
+
gr.Interface(ner,
|
22 |
+
gr.Textbox(lines=5, placeholder="Enter sentence here..."),
|
23 |
+
gr.HighlightedText(show_legend=True, color_map={"PER": "red", "ORG": "green", "LOC": "blue"}),
|
24 |
+
examples=examples,
|
25 |
+
title="NerROB-czech",
|
26 |
+
description="This is an implementation of a Named Entity Recognition model for the Czech language using gazetteers.",
|
27 |
+
allow_flagging="never")
|
28 |
+
|
29 |
+
gr.Interface(ner,
|
30 |
+
gr.File(label="Upload a JSON file"),
|
31 |
+
None,
|
32 |
+
allow_flagging="never",
|
33 |
+
description="Here you can upload your own gazetteers.",
|
34 |
+
)
|
35 |
|
36 |
if __name__ == "__main__":
|
37 |
demo.launch()
|
flagged/log.csv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
text,output,flag,username,timestamp
|
2 |
+
Masarykova univerzita se nachází v Brně .,"[{""token"": """", ""class_or_confidence"": null}, {""token"": ""Masarykova univerzita"", ""class_or_confidence"": ""ORG""}, {""token"": "" se nach\u00e1z\u00ed v "", ""class_or_confidence"": null}, {""token"": ""Brn\u011b"", ""class_or_confidence"": ""LOC""}, {""token"": "" ."", ""class_or_confidence"": null}]",,,2024-05-06 02:29:01.157209
|
3 |
+
Barack Obama navštívil Prahu minulý týden .,"[{""token"": """", ""class_or_confidence"": null}, {""token"": ""Barack Obama"", ""class_or_confidence"": ""OSV""}, {""token"": "" nav\u0161t\u00edvil "", ""class_or_confidence"": null}, {""token"": ""Prahu"", ""class_or_confidence"": ""LOC""}, {""token"": "" minul\u00fd t\u00fdden ."", ""class_or_confidence"": null}]",,,2024-05-06 02:31:57.950478
|
4 |
+
Masarykova univerzita se nachází v Brně .,"[{""token"": """", ""class_or_confidence"": null}, {""token"": ""Masarykova univerzita"", ""class_or_confidence"": ""ORG""}, {""token"": "" se nach\u00e1z\u00ed v "", ""class_or_confidence"": null}, {""token"": ""Brn\u011b"", ""class_or_confidence"": ""LOC""}, {""token"": "" ."", ""class_or_confidence"": null}]",,,2024-05-06 02:51:30.197653
|
5 |
+
Barack Obama navštívil Prahu minulý týden .,,,,2024-05-06 10:58:33.085992
|
6 |
+
Masarykova univerzita se nachází v Brně .,"[{""token"": """", ""class_or_confidence"": null}, {""token"": ""Masarykova univerzita"", ""class_or_confidence"": ""ORG""}, {""token"": "" se nach\u00e1z\u00ed v "", ""class_or_confidence"": null}, {""token"": ""Brn\u011b"", ""class_or_confidence"": ""LOC""}, {""token"": "" ."", ""class_or_confidence"": null}]",,,2024-05-06 11:00:17.762652
|
7 |
+
Masarykova univerzita se nachází v Brně .,"[{""token"": """", ""class_or_confidence"": null}, {""token"": ""Masarykova univerzita"", ""class_or_confidence"": ""ORG""}, {""token"": "" se nach\u00e1z\u00ed v "", ""class_or_confidence"": null}, {""token"": ""Brn\u011b"", ""class_or_confidence"": ""LOC""}, {""token"": "" ."", ""class_or_confidence"": null}]",,,2024-05-06 11:00:20.057269
|
8 |
+
,,,,,2024-05-09 22:59:12.114264
|
website_script.py
CHANGED
@@ -9,10 +9,11 @@ from data_manipulation.preprocess_gazetteers import build_reverse_dictionary
|
|
9 |
def load():
|
10 |
model_name = "ufal/robeczech-base"
|
11 |
model_path = "bettystr/NerRoB-czech"
|
|
|
|
|
12 |
model = ExtendedEmbeddigsRobertaForTokenClassification.from_pretrained(model_path).to("cpu")
|
13 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
14 |
model.eval()
|
15 |
-
gazetteers_path = "gazz2.json"
|
16 |
gazetteers_for_matching = load_gazetteers(gazetteers_path)
|
17 |
temp = []
|
18 |
for i in gazetteers_for_matching.keys():
|
|
|
9 |
def load():
|
10 |
model_name = "ufal/robeczech-base"
|
11 |
model_path = "bettystr/NerRoB-czech"
|
12 |
+
gazetteers_path = "gazz2.json"
|
13 |
+
|
14 |
model = ExtendedEmbeddigsRobertaForTokenClassification.from_pretrained(model_path).to("cpu")
|
15 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
16 |
model.eval()
|
|
|
17 |
gazetteers_for_matching = load_gazetteers(gazetteers_path)
|
18 |
temp = []
|
19 |
for i in gazetteers_for_matching.keys():
|