import gradio as gr import spacy from spacy.pipeline import EntityRuler from spacy.language import Language from spacy.matcher import PhraseMatcher from spacy.tokens import Span nlp = spacy.load("en_core_web_md") user_input = input(str("")) doc1 = nlp(user_input) #print list of entities captured by pertained model for ent in doc1.ents: print(ent.text, ent.label_) #inspect labels and their meaning for ent in doc1.ents: print(ent.label_, spacy.explain(ent.label_)) #Use PhraseMatcher to find all references of interest #Define the different references to Covid user_entries = input(str("")) #gradio text box here to enter sample terms pattern_list = [] for i in user_entries.strip().split(): pattern_list.append(i) patterns = list(nlp.pipe(pattern_list)) print("patterns:", patterns) #Instantiate PhraseMatcher matcher = PhraseMatcher(nlp.vocab) #Create label for pattern user_named = input(str("").strip()) #gradio text box here to enter pattern label matcher.add(user_named, patterns) # Define the custom component @Language.component("added_component") def added_component_function(doc): #Apply the matcher to the doc matches = matcher(doc) #Create a Span for each match and assign the label spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches] # Overwrite the doc.ents with the matched spans doc.ents = spans return doc # Add the component to the pipeline after the "ner" component nlp.add_pipe("added_component"), after="ner") print(nlp.pipe_names) #Verify that your model now detects all specified mentions of Covid on another text user_doc = input(str("").strip()) apply_doc = nlp(user_doc) print([(ent.text, ent.label_) for ent in apply_doc.ents]) #Count total mentions of label COVID in the 3rd document from collections import Counter labels = [ent.label_ for ent in apply_doc.ents] Counter(labels)