CustomizeNER / app.py
tdubon's picture
Update app.py
fee7a69
raw
history blame
2.57 kB
import gradio as gr
import spacy
from spacy.pipeline import EntityRuler
from spacy.language import Language
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span
nlp = spacy.load("en_core_web_md")
def load(txt1, txt2, txt3, txt4):
user_input = str(txt1.strip())
doc1 = nlp(user_input)
entities = [(ent.text, ent.label_) for ent in doc1.ents]
pattern_list = []
for i in txt2.strip().split():
pattern_list.append(i)
patterns = list(nlp.pipe(pattern_list))
matcher = PhraseMatcher(nlp.vocab)
user_named = str(txt3.strip())
matcher.add(user_named, patterns)
@Language.component("added_component")
def component_function(doc):
matches = matcher(doc)
spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
doc.ents = spans
return doc
if "added_component" not in nlp.pipe_names:
nlp.add_pipe(("added_component"), after="ner")
user_input4 = str(txt4.strip())
apply_doc = nlp(user_input4)
entities2 = [(ent.text, ent.label_) for ent in apply_doc.ents]
from collections import Counter
labels = [ent.label_ for ent in apply_doc.ents]
lab_counts = Counter(labels)
return(entities, entities2, lab_counts)
description = "Use this space to produce and test your own customized NER"
iface = gr.Interface(
title = "Customized Named Entity Recognition",
description = description,
fn = load,
interpretation = "shap",
inputs = [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition"), gr.inputs.Textbox(lines=3, default= "Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2", label="Enter entity references"), gr.inputs.Textbox(lines=1, default="COVID", label="Enter entity label"), gr.inputs.Textbox(lines=10, default="The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2.", label="Enter new sentence containing named entity")],
outputs = [gr.outputs.Textbox(type="str", label="Entities recognized before"),
gr.outputs.Textbox(type="str", label="Entites recognized after"),
gr.outputs.Textbox(type="str", label="Count of entities captured for new label")],
theme = "dark"
)
iface.launch()