CustomizeNER / app.py
tdubon's picture
Update app.py
6e9b9b9
raw
history blame
No virus
2.68 kB
!pip install https://huggingface.co/spacy/en_core_web_md/resolve/main/en_core_web_md-any-py3-none-any.whl
import gradio as gr
import spacy
from spacy.pipeline import EntityRuler
from spacy.language import Language
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span
nlp = spacy.load("en_core_web_md")
def load(txt1, txt2, txt3, txt4):
user_input = str(txt1.strip())
doc1 = nlp(user_input)
entities = [(ent.text, ent.label_) for ent in doc1.ents]
pattern_list = []
for i in txt2.strip().split():
pattern_list.append(i)
patterns = list(nlp.pipe(pattern_list))
matcher = PhraseMatcher(nlp.vocab)
user_named = str(txt3.strip())
matcher.add(user_named, patterns)
@Language.component("added_component")
def component_function(doc):
matches = matcher(doc)
spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
doc.ents = spans
return doc
if "added_component" not in nlp.pipe_names:
nlp.add_pipe(("added_component"), after="ner")
user_input4 = str(txt4.strip())
apply_doc = nlp(user_input4)
entities2 = [(ent.text, ent.label_) for ent in apply_doc.ents]
from collections import Counter
labels = [ent.label_ for ent in apply_doc.ents]
lab_counts = Counter(labels)
return(entities, entities2, lab_counts)
description = "Use this space to produce and test your own customized NER"
iface = gr.Interface(
title = "Customized Named Entity Recognition",
description = description,
fn = load,
interpretation = "shap",
inputs = [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition"), gr.inputs.Textbox(lines=3, default= "Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2", label="Enter entity references"), gr.inputs.Textbox(lines=1, default="COVID", label="Enter entity label"), gr.inputs.Textbox(lines=10, default="The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2.", label="Enter new sentence containing named entity")],
outputs = [gr.outputs.Textbox(type="str", label="Entities recognized before"),
gr.outputs.Textbox(type="str", label="Entites recognized after"),
gr.outputs.Textbox(type="str", label="Count of entities captured for new label")],
theme = "dark"
)
iface.launch()