import gradio as gr import spacy from spacy.pipeline import EntityRuler from spacy.language import Language from spacy.matcher import PhraseMatcher from spacy.tokens import Span nlp = spacy.load("en_core_web_md") def load(txt1, txt2, txt3, txt4): user_input = str(txt1.strip()) doc1 = nlp(user_input) entities = [(ent.text, ent.label_) for ent in doc1.ents] pattern_list = [] for i in txt2.strip().split(): pattern_list.append(i) patterns = list(nlp.pipe(pattern_list)) matcher = PhraseMatcher(nlp.vocab) user_named = str(txt3.strip()) matcher.add(user_named, patterns) @Language.component("added_component") def component_function(doc): matches = matcher(doc) spans = [Span(doc, start, end, label=user_named) for match_id, start, end in matches] doc.ents = spans return doc if "added_component" not in nlp.pipe_names: nlp.add_pipe(("added_component"), after="ner") user_input4 = str(txt4.strip()) apply_doc = nlp(user_input4) entities2 = [(ent.text, ent.label_) for ent in apply_doc.ents] from collections import Counter labels = [ent.label_ for ent in apply_doc.ents] lab_counts = Counter(labels) return(entities, entities2, lab_counts) description = "Use this space to produce and test your own customized NER" iface = gr.Interface( title = "Customized Named Entity Recognition", description = description, fn = load, interpretation = "shap", inputs = [gr.inputs.Textbox(lines=10, default="The coronavirus disease 2019 (COVID-19) pandemic is the result of widespread infection with severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2).", label="Text to Run through Entity Recognition"), gr.inputs.Textbox(lines=3, default= "Coronavirus, coronavirus, COVID-19, SARS-CoV-2, SARS‐CoV‐2", label="Enter entity references"), gr.inputs.Textbox(lines=1, default="COVID", label="Enter entity label"), gr.inputs.Textbox(lines=10, default="The tissue distribution of the virus-targeted receptor protein, angiotensin converting enzyme II (ACE2), determines which organs will be attacked by SARS‐CoV‐2.", label="Enter new sentence containing named entity")], outputs = [gr.outputs.Textbox(type="str", label="Entities recognized before"), gr.outputs.Textbox(type="str", label="Entites recognized after"), gr.outputs.Textbox(type="str", label="Count of entities captured for new label")], theme = "dark" ) iface.launch()