Spaces:

tdubon
/

CustomizeNER

Runtime error

App Files Files Community

tdubon commited on Feb 14, 2022

Commit

12d1f22

•

1 Parent(s): 2b6ed11

Upload ExecutableCode.py

Browse files

Files changed (1) hide show

ExecutableCode.py +64 -0

ExecutableCode.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import gradio as gr
+import spacy
+from spacy.pipeline import EntityRuler
+from spacy.language import Language
+from spacy.matcher import PhraseMatcher
+from spacy.tokens import Span
+nlp = spacy.load("en_core_web_md")
+user_input = input(str(""))
+doc1 = nlp(user_input)
+#print list of entities captured by pertained model
+for ent in doc1.ents:
+    print(ent.text, ent.label_)
+#inspect labels and their meaning
+for ent in doc1.ents:
+    print(ent.label_, spacy.explain(ent.label_))
+#Use PhraseMatcher to find all references of interest
+#Define the different references to Covid
+user_entries = input(str("")) #gradio text box here to enter sample terms
+pattern_list = []
+for i in user_entries.strip().split():
+   pattern_list.append(i)
+patterns = list(nlp.pipe(pattern_list))
+print("patterns:", patterns)
+#Instantiate PhraseMatcher
+matcher = PhraseMatcher(nlp.vocab)
+#Create label for pattern
+user_named = input(str("").strip()) #gradio text box here to enter pattern label
+matcher.add(user_named, patterns)
+# Define the custom component
+@Language.component("added_component")
+def added_component_function(doc):
+  #Apply the matcher to the doc
+  matches = matcher(doc)
+  #Create a Span for each match and assign the label
+  spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches]
+  # Overwrite the doc.ents with the matched spans
+  doc.ents = spans
+  return doc
+# Add the component to the pipeline after the "ner" component
+nlp.add_pipe("added_component"), after="ner")
+print(nlp.pipe_names)
+#Verify that your model now detects all specified mentions of Covid on another text
+user_doc = input(str("").strip())
+apply_doc = nlp(user_doc)
+print([(ent.text, ent.label_) for ent in apply_doc.ents])
+#Count total mentions of label COVID in the 3rd document
+from collections import Counter
+labels = [ent.label_ for ent in apply_doc.ents]
+Counter(labels)