Spaces:

tdubon
/

CustomizeNER

Runtime error

App Files Files Community

CustomizeNER / app.py

tdubon

Create app.py

70cda52 about 3 years ago

raw

history blame

4.73 kB

	import gradio as gr
	import spacy
	from spacy.pipeline import EntityRuler
	from spacy.language import Language
	from spacy.matcher import PhraseMatcher
	from spacy.tokens import Span

	nlp = spacy.load("en_core_web_md")

	#Text 1
	def process_text(text1):
	d = load(text1)
	return [
	for ent in doc1.ents:
	print(ent.text, ent.label_)
	for ent in doc1.ents:
	print(ent.label_, spacy.explain(ent.label_))
	]

	def load(text):
	user_input = str(text.strip())
	doc1 = nlp(user_input)

	#Text 2
	def entities(text2):
	a = named_ents(text2)
	return [print("patterns:", patterns)]

	def named_ents(text):
	pattern_list = []
	for i in text.strip().split():
	pattern_list.append(i)

	patterns = list(nlp.pipe(pattern_list))

	#Text 3
	def run(text3):
	b = pipe(text3)
	return [
	doc
	print(nlp.pipe_names)]

	def pipe(text):
	matcher = PhraseMatcher(nlp.vocab)
	#Create label for pattern
	user_named = str(text.strip()) #gradio text box here to enter pattern label
	matcher.add(user_named, patterns)
	# Define the custom component
	@Language.component("covid_component")
	def covid_component_function(doc):
	# Apply the matcher to the doc
	matches = matcher(doc)
	# Create a Span for each match and assign the label "ANIMAL"
	spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
	# Overwrite the doc.ents with the matched spans
	doc.ents = spans
	return doc
	# Add the component to the pipeline after the "ner" component
	nlp.add_pipe((user_named + "component"), after="ner")
	print(nlp.pipe_names)

	#Text 4

	def test(text4):
	c = new_sample(text4)
	return [
	print([(ent.text, ent.label_) for ent in apply_doc.ents])
	Counter(labels)]


	def new_sample(text):
	user_doc = str(text).strip())
	apply_doc = nlp(user_doc)
	print([(ent.text, ent.label_) for ent in apply_doc.ents])
	#Count total mentions of label COVID in the 3rd document
	from collections import Counter
	labels = [ent.label_ for ent in apply_doc.ents]
	Counter(labels)


	#user_input = input(str("")) #gradio text box here to enter sample text
	#doc1 = nlp(user_input)

	#print list of entities captured by pertained model
	#for ent in doc1.ents:
	#print(ent.text, ent.label_)

	#inspect labels and their meaning
	#for ent in doc1.ents:
	#print(ent.label_, spacy.explain(ent.label_))

	#Use PhraseMatcher to find all references of interest
	#Define the different references to Covid
	#user_entries = input(str("")) #gradio text box here to enter sample terms
	#pattern_list = []

	#for i in user_entries.strip().split():
	# pattern_list.append(i)

	#patterns = list(nlp.pipe(pattern_list))
	#print("patterns:", patterns)

	#Instantiate PhraseMatcher
	#matcher = PhraseMatcher(nlp.vocab)

	#Create label for pattern
	#user_named = input(str("").strip()) #gradio text box here to enter pattern label
	#matcher.add(user_named, patterns)

	# Define the custom component
	#@Language.component("covid_component")
	#def covid_component_function(doc):
	# Apply the matcher to the doc
	# matches = matcher(doc)
	# Create a Span for each match and assign the label "ANIMAL"
	# spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
	# Overwrite the doc.ents with the matched spans
	# doc.ents = spans
	# return doc

	# Add the component to the pipeline after the "ner" component
	#nlp.add_pipe((user_named + "component"), after="ner")
	#print(nlp.pipe_names)


	#Verify that your model now detects all specified mentions of Covid on another text
	#user_doc = input(str("").strip())
	#apply_doc = nlp(user_doc)
	#print([(ent.text, ent.label_) for ent in apply_doc.ents])

	#Count total mentions of label COVID in the 3rd document
	#from collections import Counter
	#labels = [ent.label_ for ent in apply_doc.ents]
	#Counter(labels)

	iface = gr.Interface(
	process_text,
	[gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition")],

	entities,
	[gr.inputs.Textbox(lines=3, default= ("Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2"), label="Enter entity references")],

	run,
	[gr.inputs.Textbox(lines=1, default= ("COVID"), label="Enter entity label")],
	gr.outputs.HighlightedText(),
	)

	test,
	[gr.inputs.Textbox(lines=1, default= ("The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2."), label="Test: Enter new sentence containing named entity")],
	gr.outputs.HighlightedText(),
	)
	iface.launch()