CustomizeNER / app.py
tdubon's picture
Update app.py
2c80115
raw
history blame contribute delete
No virus
2.56 kB
import gradio as gr
import spacy
from spacy.pipeline import EntityRuler
from spacy.language import Language
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span
nlp = spacy.load("en_core_web_md")
def load(txt1, txt2, txt3, txt4):
user_input = str(txt1.strip())
doc1 = nlp(user_input)
entities = [(ent.text, ent.label_) for ent in doc1.ents]
pattern_list = []
for i in txt2.strip().split():
pattern_list.append(i)
patterns = list(nlp.pipe(pattern_list))
matcher = PhraseMatcher(nlp.vocab)
user_named = str(txt3.strip())
matcher.add(user_named, patterns)
@Language.component("added_component")
def component_function(doc):
matches = matcher(doc)
spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
doc.ents = spans
return doc
if "added_component" not in nlp.pipe_names:
nlp.add_pipe(("added_component"), after="ner")
user_input4 = str(txt4.strip())
apply_doc = nlp(user_input4)
entities2 = [(ent.text, ent.label_) for ent in apply_doc.ents]
from collections import Counter
labels = [ent.label_ for ent in apply_doc.ents]
lab_counts = Counter(labels)
return(entities, entities2, lab_counts)
description = "Use this space to produce and test your own customized NER"
iface = gr.Interface(
title = "Customized Named Entity Recognition",
description = description,
fn = load,
inputs = [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition"), gr.inputs.Textbox(lines=3, default= "Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2", label="Enter entity references"), gr.inputs.Textbox(lines=1, default="COVID", label="Enter entity label"), gr.inputs.Textbox(lines=10, default="The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2.", label="Enter new sentence containing named entity")],
outputs = [gr.outputs.Textbox(type="str", label="Entities recognized before"),
gr.outputs.Textbox(type="str", label="Entites recognized after - on new text"),
gr.outputs.Textbox(type="str", label="Count of entities captured for new label")],
theme = "dark"
)
iface.launch()