Spaces:

bettystr
/

NerRoB-czech

Sleeping

AlzbetaStrompova commited on May 5, 2024

Commit

f3898ef

1 Parent(s): 2a9fe7e

change output

Files changed (2) hide show

app.py CHANGED Viewed

@@ -6,7 +6,11 @@ tokenizer, model, gazetteers_for_matching = load()
 print("Loaded model")
 examples = [
-    "Masarykova univerzita",
 ]
 def ner(text):
@@ -15,8 +19,7 @@ def ner(text):
 demo = gr.Interface(ner,
              gr.Textbox(placeholder="Enter sentence here..."),
-             "textbox",
-             #gr.HighlightedText(), # TODO https://www.gradio.app/guides/named-entity-recognition
              examples=examples)
 if __name__ == "__main__":

 print("Loaded model")
 examples = [
+    "Masarykova univerzita se nachází v Brně.",
+    "Barack Obama navštívil Prahu minulý týden.",
+    "Angela Merkelová se setkala s francouzským prezidentem v Paříži.",
+    "Karel Čapek napsal knihu R.U.R., která byla poprvé představena v Praze.",
+    "Nobelova cena za fyziku byla udělena týmu vědců z MIT."
 ]
 def ner(text):
 demo = gr.Interface(ner,
              gr.Textbox(placeholder="Enter sentence here..."),
+             gr.HighlightedText(show_legend=True,),
              examples=examples)
 if __name__ == "__main__":

website_script.py CHANGED Viewed

@@ -44,4 +44,16 @@ def run(tokenizer, model, gazetteers_for_matching, text):
     output = model(input_ids=input_ids, attention_mask=attention_mask, per=per, org=org, loc=loc).logits
     predictions = torch.argmax(output, dim=2).tolist()
     predicted_tags = [[model.config.id2label[idx] for idx in sentence] for sentence in predictions]
-    return " ".join(predicted_tags[0])

     output = model(input_ids=input_ids, attention_mask=attention_mask, per=per, org=org, loc=loc).logits
     predictions = torch.argmax(output, dim=2).tolist()
     predicted_tags = [[model.config.id2label[idx] for idx in sentence] for sentence in predictions]
+    softmax = torch.nn.Softmax(dim=2)
+    scores = softmax(output).squeeze(0).tolist()
+    result = []
+    for pos, entity, score in zip(tokenized_inputs.offset_mapping, predicted_tags[0], scores):
+        result.append({
+            "start": pos[0],
+            "end": pos[1],
+            "entity": entity,
+            "score": max(score),
+            "word": text[pos[0]:pos[1]],
+        })
+    return result