crabz commited on
Commit
f1d4807
β€’
1 Parent(s): 44562bb

output displacy html

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +50 -4
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .idea/
app.py CHANGED
@@ -1,7 +1,53 @@
1
  import gradio as gr
 
2
 
3
- def greet(name):
4
- return "Ahoj " + name + "!"
5
 
6
- intf = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- intf.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
3
 
4
+ import spacy
5
+ from spacy import displacy
6
 
7
+ ner_map = {0: '0',
8
+ 1: 'B-OSOBA',
9
+ 2: 'I-OSOBA',
10
+ 3: 'B-ORGANIZÁCIA',
11
+ 4: 'I-ORGANIZÁCIA',
12
+ 5: 'B-LOKALITA',
13
+ 6: 'I-LOKALITA'}
14
+
15
+ options = {"ents": ["OSOBA",
16
+ "ORGANIZÁCIA",
17
+ "LOKALITA"],
18
+ "colors": {"OSOBA": "lightblue",
19
+ "ORGANIZÁCIA": "lightcoral",
20
+ "LOKALITA": "lightgreen"}}
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained("crabz/slovakbert-ner")
23
+ model = AutoModelForTokenClassification.from_pretrained("crabz/slovakbert-ner")
24
+ ner_pipeline = pipeline(task='ner', model=model, tokenizer=tokenizer)
25
+ nlp = spacy.blank("en")
26
+
27
+
28
+ def apply_ner(text: str):
29
+ classifications = ner_pipeline(text)
30
+
31
+ entities = []
32
+ for i in range(len(classifications)):
33
+ if classifications[i]['entity'] != 0:
34
+ if ner_map[classifications[i]['entity']][0] == 'B':
35
+ j = i + 1
36
+ while j < len(classifications) and ner_map[classifications[j]['entity']][0] == 'I':
37
+ j += 1
38
+ entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'],
39
+ classifications[j - 1]['end']))
40
+ doc = nlp(text)
41
+
42
+ ents = []
43
+ for ee in entities:
44
+ ents.append(doc.char_span(ee[1], ee[2], ee[0]))
45
+ doc.ents = ents
46
+
47
+ displacy_html = displacy.render(doc, style="ent", options=options)
48
+ return displacy_html
49
+
50
+
51
+ intf = gr.Interface(fn=apply_ner, inputs="text", outputs="html", title='Slovak Named Entity Recognition',
52
+ allow_flagging=False)
53
+ intf.launch()