import gradio as gr
import spacy
from spacy.pipeline import EntityRuler
from spacy.language import Language
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span

nlp = spacy.load("en_core_web_md")

user_input = input(str("")) 
doc1 = nlp(user_input)

#print list of entities captured by pertained model
for ent in doc1.ents:
    print(ent.text, ent.label_)
    
#inspect labels and their meaning
for ent in doc1.ents:
    print(ent.label_, spacy.explain(ent.label_))

#Use PhraseMatcher to find all references of interest
#Define the different references to Covid
user_entries = input(str("")) #gradio text box here to enter sample terms
pattern_list = []

for i in user_entries.strip().split():
   pattern_list.append(i)
  
patterns = list(nlp.pipe(pattern_list))
print("patterns:", patterns)

#Instantiate PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)

#Create label for pattern
user_named = input(str("").strip()) #gradio text box here to enter pattern label
matcher.add(user_named, patterns)

# Define the custom component
@Language.component("added_component")
def added_component_function(doc):
  #Apply the matcher to the doc
  matches = matcher(doc)
  #Create a Span for each match and assign the label
  spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
  # Overwrite the doc.ents with the matched spans
  doc.ents = spans
  return doc

# Add the component to the pipeline after the "ner" component
nlp.add_pipe("added_component"), after="ner")
print(nlp.pipe_names)


#Verify that your model now detects all specified mentions of Covid on another text
user_doc = input(str("").strip())
apply_doc = nlp(user_doc)
print([(ent.text, ent.label_) for ent in apply_doc.ents])

#Count total mentions of label COVID in the 3rd document
from collections import Counter
labels = [ent.label_ for ent in apply_doc.ents]
Counter(labels)