CustomizeNER / ExecutableCode.py
tdubon's picture
Upload ExecutableCode.py
12d1f22
raw
history blame contribute delete
No virus
1.89 kB
import gradio as gr
import spacy
from spacy.pipeline import EntityRuler
from spacy.language import Language
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span
nlp = spacy.load("en_core_web_md")
user_input = input(str(""))
doc1 = nlp(user_input)
#print list of entities captured by pertained model
for ent in doc1.ents:
print(ent.text, ent.label_)
#inspect labels and their meaning
for ent in doc1.ents:
print(ent.label_, spacy.explain(ent.label_))
#Use PhraseMatcher to find all references of interest
#Define the different references to Covid
user_entries = input(str("")) #gradio text box here to enter sample terms
pattern_list = []
for i in user_entries.strip().split():
pattern_list.append(i)
patterns = list(nlp.pipe(pattern_list))
print("patterns:", patterns)
#Instantiate PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)
#Create label for pattern
user_named = input(str("").strip()) #gradio text box here to enter pattern label
matcher.add(user_named, patterns)
# Define the custom component
@Language.component("added_component")
def added_component_function(doc):
#Apply the matcher to the doc
matches = matcher(doc)
#Create a Span for each match and assign the label
spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
# Overwrite the doc.ents with the matched spans
doc.ents = spans
return doc
# Add the component to the pipeline after the "ner" component
nlp.add_pipe("added_component"), after="ner")
print(nlp.pipe_names)
#Verify that your model now detects all specified mentions of Covid on another text
user_doc = input(str("").strip())
apply_doc = nlp(user_doc)
print([(ent.text, ent.label_) for ent in apply_doc.ents])
#Count total mentions of label COVID in the 3rd document
from collections import Counter
labels = [ent.label_ for ent in apply_doc.ents]
Counter(labels)