Spaces:

tdubon
/

CustomizeNER

Runtime error

App Files Files Community

CustomizeNER / ExecutableCode.py

tdubon

Upload ExecutableCode.py

12d1f22 almost 4 years ago

raw

history blame contribute delete

1.89 kB

	import gradio as gr
	import spacy
	from spacy.pipeline import EntityRuler
	from spacy.language import Language
	from spacy.matcher import PhraseMatcher
	from spacy.tokens import Span

	nlp = spacy.load("en_core_web_md")

	user_input = input(str(""))
	doc1 = nlp(user_input)

	#print list of entities captured by pertained model
	for ent in doc1.ents:
	print(ent.text, ent.label_)

	#inspect labels and their meaning
	for ent in doc1.ents:
	print(ent.label_, spacy.explain(ent.label_))

	#Use PhraseMatcher to find all references of interest
	#Define the different references to Covid
	user_entries = input(str("")) #gradio text box here to enter sample terms
	pattern_list = []

	for i in user_entries.strip().split():
	pattern_list.append(i)

	patterns = list(nlp.pipe(pattern_list))
	print("patterns:", patterns)

	#Instantiate PhraseMatcher
	matcher = PhraseMatcher(nlp.vocab)

	#Create label for pattern
	user_named = input(str("").strip()) #gradio text box here to enter pattern label
	matcher.add(user_named, patterns)

	# Define the custom component
	@Language.component("added_component")
	def added_component_function(doc):
	#Apply the matcher to the doc
	matches = matcher(doc)
	#Create a Span for each match and assign the label
	spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
	# Overwrite the doc.ents with the matched spans
	doc.ents = spans
	return doc

	# Add the component to the pipeline after the "ner" component
	nlp.add_pipe("added_component"), after="ner")
	print(nlp.pipe_names)


	#Verify that your model now detects all specified mentions of Covid on another text
	user_doc = input(str("").strip())
	apply_doc = nlp(user_doc)
	print([(ent.text, ent.label_) for ent in apply_doc.ents])

	#Count total mentions of label COVID in the 3rd document
	from collections import Counter
	labels = [ent.label_ for ent in apply_doc.ents]
	Counter(labels)