CustomizeNER / app.py
tdubon's picture
Create app.py
70cda52
raw
history blame
4.73 kB
import gradio as gr
import spacy
from spacy.pipeline import EntityRuler
from spacy.language import Language
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span
nlp = spacy.load("en_core_web_md")
#Text 1
def process_text(text1):
d = load(text1)
return [
for ent in doc1.ents:
print(ent.text, ent.label_)
for ent in doc1.ents:
print(ent.label_, spacy.explain(ent.label_))
]
def load(text):
user_input = str(text.strip())
doc1 = nlp(user_input)
#Text 2
def entities(text2):
a = named_ents(text2)
return [print("patterns:", patterns)]
def named_ents(text):
pattern_list = []
for i in text.strip().split():
pattern_list.append(i)
patterns = list(nlp.pipe(pattern_list))
#Text 3
def run(text3):
b = pipe(text3)
return [
doc
print(nlp.pipe_names)]
def pipe(text):
matcher = PhraseMatcher(nlp.vocab)
#Create label for pattern
user_named = str(text.strip()) #gradio text box here to enter pattern label
matcher.add(user_named, patterns)
# Define the custom component
@Language.component("covid_component")
def covid_component_function(doc):
# Apply the matcher to the doc
matches = matcher(doc)
# Create a Span for each match and assign the label "ANIMAL"
spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
# Overwrite the doc.ents with the matched spans
doc.ents = spans
return doc
# Add the component to the pipeline after the "ner" component
nlp.add_pipe((user_named + "component"), after="ner")
print(nlp.pipe_names)
#Text 4
def test(text4):
c = new_sample(text4)
return [
print([(ent.text, ent.label_) for ent in apply_doc.ents])
Counter(labels)]
def new_sample(text):
user_doc = str(text).strip())
apply_doc = nlp(user_doc)
print([(ent.text, ent.label_) for ent in apply_doc.ents])
#Count total mentions of label COVID in the 3rd document
from collections import Counter
labels = [ent.label_ for ent in apply_doc.ents]
Counter(labels)
#user_input = input(str("")) #gradio text box here to enter sample text
#doc1 = nlp(user_input)
#print list of entities captured by pertained model
#for ent in doc1.ents:
#print(ent.text, ent.label_)
#inspect labels and their meaning
#for ent in doc1.ents:
#print(ent.label_, spacy.explain(ent.label_))
#Use PhraseMatcher to find all references of interest
#Define the different references to Covid
#user_entries = input(str("")) #gradio text box here to enter sample terms
#pattern_list = []
#for i in user_entries.strip().split():
# pattern_list.append(i)
#patterns = list(nlp.pipe(pattern_list))
#print("patterns:", patterns)
#Instantiate PhraseMatcher
#matcher = PhraseMatcher(nlp.vocab)
#Create label for pattern
#user_named = input(str("").strip()) #gradio text box here to enter pattern label
#matcher.add(user_named, patterns)
# Define the custom component
#@Language.component("covid_component")
#def covid_component_function(doc):
# Apply the matcher to the doc
# matches = matcher(doc)
# Create a Span for each match and assign the label "ANIMAL"
# spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
# Overwrite the doc.ents with the matched spans
# doc.ents = spans
# return doc
# Add the component to the pipeline after the "ner" component
#nlp.add_pipe((user_named + "component"), after="ner")
#print(nlp.pipe_names)
#Verify that your model now detects all specified mentions of Covid on another text
#user_doc = input(str("").strip())
#apply_doc = nlp(user_doc)
#print([(ent.text, ent.label_) for ent in apply_doc.ents])
#Count total mentions of label COVID in the 3rd document
#from collections import Counter
#labels = [ent.label_ for ent in apply_doc.ents]
#Counter(labels)
iface = gr.Interface(
process_text,
[gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition")],
entities,
[gr.inputs.Textbox(lines=3, default= ("Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2"), label="Enter entity references")],
run,
[gr.inputs.Textbox(lines=1, default= ("COVID"), label="Enter entity label")],
gr.outputs.HighlightedText(),
)
test,
[gr.inputs.Textbox(lines=1, default= ("The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2."), label="Test: Enter new sentence containing named entity")],
gr.outputs.HighlightedText(),
)
iface.launch()